diff options
Diffstat (limited to 'fpga/usrp3/lib/rfnoc')
246 files changed, 47221 insertions, 0 deletions
diff --git a/fpga/usrp3/lib/rfnoc/.gitignore b/fpga/usrp3/lib/rfnoc/.gitignore new file mode 100644 index 000000000..8b13fff0f --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/.gitignore @@ -0,0 +1,15 @@ +noc_shell_tb +moving_sum_tb +noc_dsp_flow_tb +simple_axi_wrapper_tb +simple_fir_tb +schmidl_cox_tb +coregen.log +top_block.py +test.dat +output.dat +isim +#* +axi_wrapper_tb +*.dat +*.sav diff --git a/fpga/usrp3/lib/rfnoc/Makefile.srcs b/fpga/usrp3/lib/rfnoc/Makefile.srcs new file mode 100644 index 000000000..cb97542ce --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/Makefile.srcs @@ -0,0 +1,112 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Brand +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +################################################## +# RFNoC Includes +################################################## +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/utils/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/xport/Makefile.srcs + +RFNOC_FRAMEWORK_SRCS = $(RFNOC_CORE_SRCS) $(RFNOC_XBAR_SRCS) $(RFNOC_UTIL_SRCS) $(RFNOC_XPORT_SRCS) + +################################################## +# RFNoC Sources +################################################## +RFNOC_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/, \ +chdr_fifo_large.v \ +chdr_framer.v \ +chdr_framer_2clk.v \ +chdr_deframer.v \ +chdr_deframer_2clk.v \ +chdr_pkt_types.vh \ +axi_packet_mux.v \ +axi_wrapper.v \ +axi_bit_reduce.v \ +null_source.v \ +split_stream.v \ +split_stream_fifo.v \ +conj.v \ +delay_fifo.v \ +delay_type2.v \ +delay_type3.v \ +delay_type4.v \ +complex_to_magsq.v \ +phase_accum.v \ +complex_invert.v \ +periodic_framer.v \ +moving_sum.v \ +counter.v \ +ram_to_fifo.v \ +const.v \ +const_sreg.v \ +cmul.v \ +cadd.v \ +keep_one_in_n.v \ +vector_iir.v \ +addsub.v \ +packet_resizer.v \ +axi_pipe.v \ +multiply.v \ +mult.v \ +mult_add.v \ +mult_rc.v \ +mult_add_rc.v \ +fft_shift.v \ +axi_pipe_join.v \ +axi_pipe_mac.v \ +axi_round_and_clip_complex.v \ +axi_round_complex.v \ +axi_clip_complex.v \ +axi_join.v \ +axi_sync.v \ +split_complex.v \ +axi_round_and_clip.v \ +join_complex.v \ +axi_round.v \ +axi_clip.v \ +axi_clip_unsigned.v \ +axi_serializer.v \ +axi_deserializer.v \ +axi_packer.v \ +complex_to_mag_approx.v \ +file_source.v \ +fosphor/delay.v \ +fosphor/fifo_srl.v \ +fosphor/rng.v \ +fosphor/f15_avg.v \ +fosphor/f15_binmap.v \ +fosphor/f15_core.v \ +fosphor/f15_eoseq.v \ +fosphor/f15_histo_mem.v \ +fosphor/f15_line_mem.v \ +fosphor/f15_logpwr.v \ +fosphor/f15_maxhold.v \ +fosphor/f15_packetizer.v \ +fosphor/f15_rise_decay.v \ +fosphor/f15_wf_agg.v \ +fosphor/axi_logpwr.v \ +cvita_hdr_parser.v \ +cvita_hdr_encoder.v \ +cvita_hdr_decoder.v \ +cvita_hdr_modify.v \ +axi_async_stream.v \ +axi_rate_change.v \ +axi_tag_time.v \ +axi_drop_partial_packet.v \ +ddc.v \ +duc.v \ +cic_decimate.v \ +cic_interpolate.v \ +axi_fir_filter.v \ +fir_filter_slice.v \ +axi_fir_filter_dec.v \ +addsub.vhd \ +dds_freq_tune.v \ +dds_timed.v \ +datapath_gatekeeper.v \ +)) diff --git a/fpga/usrp3/lib/rfnoc/addsub.v b/fpga/usrp3/lib/rfnoc/addsub.v new file mode 100644 index 000000000..46492bf8f --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/addsub.v @@ -0,0 +1,41 @@ +// +// Copyright 2013 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Demonstration of two input, two output block + +module addsub + #(parameter WIDTH = 16) + (input clk, input reset, + input [WIDTH*2-1:0] i0_tdata, input i0_tlast, input i0_tvalid, output i0_tready, + input [WIDTH*2-1:0] i1_tdata, input i1_tlast, input i1_tvalid, output i1_tready, + output [WIDTH*2-1:0] sum_tdata, output sum_tlast, output sum_tvalid, input sum_tready, + output [WIDTH*2-1:0] diff_tdata, output diff_tlast, output diff_tvalid, input diff_tready); + + wire [WIDTH*4-1:0] dummy; + wire [WIDTH*4-1:0] int_tdata; + wire int_tlast, int_tvalid, int_tready; + + assign int_tvalid = i0_tvalid & i1_tvalid; + assign i0_tready = int_tvalid & int_tready; + assign i1_tready = int_tvalid & int_tready; + + wire [WIDTH-1:0] sum_a = i0_tdata[WIDTH*2-1:WIDTH] + i1_tdata[WIDTH*2-1:WIDTH]; + wire [WIDTH-1:0] diff_a = i0_tdata[WIDTH*2-1:WIDTH] - i1_tdata[WIDTH*2-1:WIDTH]; + + wire [WIDTH-1:0] sum_b = i0_tdata[WIDTH-1:0] + i1_tdata[WIDTH-1:0]; + wire [WIDTH-1:0] diff_b = i0_tdata[WIDTH-1:0] - i1_tdata[WIDTH-1:0]; + + assign int_tdata = { sum_a,sum_b,diff_a,diff_b }; + assign int_tlast = i0_tlast; // Follow first input. + + split_stream_fifo #(.WIDTH(4*WIDTH), .ACTIVE_MASK(4'b0011)) splitter + (.clk(clk), .reset(reset), .clear(1'b0), + .i_tdata(int_tdata), .i_tlast(int_tlast), .i_tvalid(int_tvalid), .i_tready(int_tready), + .o0_tdata({sum_tdata,dummy[WIDTH*2-1:0]}), .o0_tlast(sum_tlast), .o0_tvalid(sum_tvalid), .o0_tready(sum_tready), + .o1_tdata({dummy[WIDTH*4-1:WIDTH*2],diff_tdata}), .o1_tlast(diff_tlast), .o1_tvalid(diff_tvalid), .o1_tready(diff_tready), + .o2_tready(1'b1), .o3_tready(1'b1)); + +endmodule // addsub diff --git a/fpga/usrp3/lib/rfnoc/addsub.vhd b/fpga/usrp3/lib/rfnoc/addsub.vhd new file mode 100644 index 000000000..3f93db028 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/addsub.vhd @@ -0,0 +1,142 @@ +-- +-- Copyright 2015 National Instruments +-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity addsub_vhdl is + generic ( + width_g : natural := 16); + port ( + clk_i : in std_ulogic; + rst_i : in std_ulogic; + + i0_tdata : in std_ulogic_vector(width_g * 2 - 1 downto 0); + i0_tlast : in std_ulogic; + i0_tvalid : in std_ulogic; + i0_tready : out std_ulogic; + + i1_tdata : in std_ulogic_vector(width_g * 2 - 1 downto 0); + i1_tlast : in std_ulogic; + i1_tvalid : in std_ulogic; + i1_tready : out std_ulogic; + + sum_tdata : out std_ulogic_vector(width_g * 2 - 1 downto 0); + sum_tlast : out std_ulogic; + sum_tvalid : out std_ulogic; + sum_tready : in std_ulogic; + + diff_tdata : out std_ulogic_vector(width_g * 2 - 1 downto 0); + diff_tlast : out std_ulogic; + diff_tvalid : out std_ulogic; + diff_tready : in std_ulogic); +end entity addsub_vhdl; + +architecture rtl of addsub_vhdl is + + component split_stream_fifo is + generic ( + WIDTH : natural := 16; + ACTIVE_MASK : std_ulogic_vector(3 downto 0); + FIFO_SIZE : natural := 6); + port ( + clk : in std_ulogic; + reset : in std_ulogic; + clear : in std_ulogic; + i_tdata : in std_ulogic_vector(WIDTH - 1 downto 0); + i_tlast : in std_ulogic; + i_tvalid : in std_ulogic; + i_tready : out std_ulogic; + o0_tdata : out std_ulogic_vector(WIDTH - 1 downto 0); + o0_tlast : out std_ulogic; + o0_tvalid : out std_ulogic; + o0_tready : in std_ulogic; + o1_tdata : out std_ulogic_vector(WIDTH - 1 downto 0); + o1_tlast : out std_ulogic; + o1_tvalid : out std_ulogic; + o1_tready : in std_ulogic; + o2_tdata : out std_ulogic_vector(WIDTH - 1 downto 0); + o2_tlast : out std_ulogic; + o2_tvalid : out std_ulogic; + o2_tready : in std_ulogic; + o3_tdata : out std_ulogic_vector(WIDTH - 1 downto 0); + o3_tlast : out std_ulogic; + o3_tvalid : out std_ulogic; + o3_tready : in std_ulogic); + end component split_stream_fifo; + + signal sum_a : unsigned(width_g - 1 downto 0); + signal sum_b : unsigned(width_g - 1 downto 0); + + signal diff_a : unsigned(width_g - 1 downto 0); + signal diff_b : unsigned(width_g - 1 downto 0); + + signal int_tdata : std_ulogic_vector(width_g * 4 - 1 downto 0); + signal int_tlast : std_ulogic; + signal int_tvalid : std_ulogic; + signal int_tready : std_ulogic; + + signal sum : std_ulogic_vector(width_g * 4 - 1 downto 0); + signal diff : std_ulogic_vector(width_g * 4 - 1 downto 0); + +begin + + i0_tready <= int_tvalid and int_tready; + i1_tready <= int_tvalid and int_tready; + + sum_a <= unsigned(i0_tdata(width_g * 2 - 1 downto width_g)) + + unsigned(i1_tdata(width_g * 2 - 1 downto width_g)); + + sum_b <= unsigned(i0_tdata(width_g - 1 downto 0)) + + unsigned(i1_tdata(width_g - 1 downto 0)); + + diff_a <= unsigned(i0_tdata(width_g * 2 - 1 downto width_g)) - + unsigned(i1_tdata(width_g * 2 - 1 downto width_g)); + + diff_b <= unsigned(i0_tdata(width_g - 1 downto 0)) - + unsigned(i1_tdata(width_g - 1 downto 0)); + + int_tdata <= std_ulogic_vector(sum_a) & + std_ulogic_vector(sum_b) & + std_ulogic_vector(diff_a) & + std_ulogic_vector(diff_b); + + int_tlast <= i0_tlast; -- Follow first input + int_tvalid <= i0_tvalid and i1_tvalid; + + splitter : split_stream_fifo + generic map ( + WIDTH => 4 * width_g, + ACTIVE_MASK => "0011", + FIFO_SIZE => 6) + port map ( + clk => clk_i, + reset => rst_i, + clear => '0', + i_tdata => int_tdata, + i_tlast => int_tlast, + i_tvalid => int_tvalid, + i_tready => int_tready, + o0_tdata => sum, + o0_tlast => sum_tlast, + o0_tvalid => sum_tvalid, + o0_tready => sum_tready, + o1_tdata => diff, + o1_tlast => diff_tlast, + o1_tvalid => diff_tvalid, + o1_tready => diff_tready, + o2_tdata => open, + o2_tlast => open, + o2_tvalid => open, + o2_tready => '1', + o3_tdata => open, + o3_tlast => open, + o3_tvalid => open, + o3_tready => '1'); + + sum_tdata <= sum(sum'high downto width_g * 2); + diff_tdata <= diff(width_g * 2 - 1 downto diff'low); + +end architecture rtl; diff --git a/fpga/usrp3/lib/rfnoc/axi_async_stream.v b/fpga/usrp3/lib/rfnoc/axi_async_stream.v new file mode 100644 index 000000000..ee4746436 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_async_stream.v @@ -0,0 +1,204 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// - Tracks and fills out header information for an axi stream that is +// asynchronous or does not have a 1:1 input / output ratio. +// - User must pass through **ALL** received words and use the tkeep +// signal to flag which words to keep. +// - This module is not intended to work with decimation / interpolation blocks. +// +// Open design questions: +// - If a tkeep burst occurs between packet boundaries, an internal tlast is +// generated splitting the burst up into two (or more) packets. This is +// an easy way to make sure the packet sizes are bounded and the VITA +// time is correct. Is this desirable, since the downstream block +// will likely want the full burst and is then forced to aggregate packets? +// + +module axi_async_stream #( + parameter WIDTH = 32, + parameter HEADER_WIDTH = 128, + parameter HEADER_FIFO_SIZE = 5, + parameter MAX_TICK_RATE = 2**16-1) +( + input clk, + input reset, + input clear, + input [15:0] src_sid, + input [15:0] dst_sid, + input [$clog2(MAX_TICK_RATE)-1:0] tick_rate, + output header_fifo_full, + // From AXI Wrapper + input [WIDTH-1:0] s_axis_data_tdata, + input [HEADER_WIDTH-1:0] s_axis_data_tuser, + input s_axis_data_tlast, + input s_axis_data_tvalid, + output s_axis_data_tready, + // To AXI Wrapper + output [WIDTH-1:0] m_axis_data_tdata, + output [HEADER_WIDTH-1:0] m_axis_data_tuser, + output m_axis_data_tlast, + output m_axis_data_tvalid, + input m_axis_data_tready, + // To User + output [WIDTH-1:0] o_tdata, + output o_tlast, + output o_tvalid, + input o_tready, + // From User + input [WIDTH-1:0] i_tdata, + input i_tlast, + input i_tvalid, + input i_tkeep, + output i_tready +); + + wire [WIDTH-1:0] i_reg_tdata; + wire i_reg_tvalid, i_reg_tlast, i_reg_tkeep, i_reg_tready; + + reg [WIDTH-1:0] pipe_tdata; + reg pipe_tvalid, pipe_tlast, pipe_tkeep; + wire pipe_tready; + + /******************************************************** + ** Register user input + ** - The output logic in some cases needs to wait for + ** i_tvalid to assert before asserting i_tready. + ** However, users may implement logic that waits for + ** i_tready to assert before asserting i_tvalid. + ** Without this register, that would result in a + ** deadlock. + ** - Note: Technically, the user waiting for i_tready + ** violates the AXI specification that a master cannot + ** wait for ready from the slave. However, it is common + ** for users to accidentally break this rule and this is + ** a cheap workaround. + ********************************************************/ + axi_fifo_flop #(.WIDTH(WIDTH+2)) axi_fifo_flop ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({i_tkeep,i_tlast,i_tdata}), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata({i_reg_tkeep,i_reg_tlast,i_reg_tdata}), .o_tvalid(i_reg_tvalid), .o_tready(i_reg_tready)); + + /******************************************************** + ** Keep track of headers for user + ********************************************************/ + wire header_in_tready, header_in_tvalid, header_out_tvalid, header_out_tready; + wire [HEADER_WIDTH-1:0] header_in_tdata, header_out_tdata; + + reg first_word = 1'b1; + reg [15:0] word_cnt; + reg [16+$clog2(MAX_TICK_RATE)-1:0] time_cnt; // 16 bit payload length + max tick rate increment + + wire [63:0] vita_time; + wire [15:0] payload_length; + + // Track first word to make sure header is read only once per packet + always @(posedge clk) begin + if (reset | clear) begin + first_word <= 1'b1; + end else begin + if (s_axis_data_tvalid & s_axis_data_tready) begin + if (s_axis_data_tlast) begin + first_word <= 1'b1; + end else if (first_word) begin + first_word <= 1'b0; + end + end + end + end + + // Header FIFO + axi_fifo #(.WIDTH(HEADER_WIDTH), .SIZE(HEADER_FIFO_SIZE)) axi_fifo ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata(header_in_tdata), .i_tvalid(header_in_tvalid), .i_tready(header_in_tready), + .o_tdata(header_out_tdata), .o_tvalid(header_out_tvalid), .o_tready(header_out_tready), + .space(), .occupied()); + + assign header_in_tdata = s_axis_data_tuser; + assign header_in_tvalid = s_axis_data_tvalid & o_tready & first_word; + assign header_out_tready = i_reg_tvalid & i_reg_tready & (word_cnt >= payload_length); + assign header_fifo_full = ~header_in_tready; + + // Track VITA time offset and word count for emptying header FIFO + always @(posedge clk) begin + if (reset | clear) begin + word_cnt <= WIDTH/8; + time_cnt <= 0; + end else begin + if (pipe_tvalid & pipe_tready) begin + if (word_cnt >= payload_length) begin + word_cnt <= WIDTH/8; + time_cnt <= 0; + end else begin + word_cnt <= word_cnt + WIDTH/8; + time_cnt <= time_cnt + tick_rate; + end + end + end + end + + // Form output header + cvita_hdr_decoder cvita_hdr_decoder ( + .header(header_out_tdata), + .pkt_type(), .eob(), .has_time(), + .seqnum(), .payload_length(payload_length), + .src_sid(), .dst_sid(), + .vita_time(vita_time)); + + cvita_hdr_modify cvita_hdr_modify ( + .header_in(header_out_tdata), + .header_out(m_axis_data_tuser), + .use_pkt_type(1'b0), .pkt_type(), + .use_has_time(1'b0), .has_time(), + .use_eob(1'b0), .eob(), + .use_seqnum(1'b0), .seqnum(), // AXI Wrapper handles this + .use_length(1'b0), .length(), // AXI Wrapper handles this + .use_payload_length(1'b0), .payload_length(), + .use_src_sid(1'b1), .src_sid(src_sid), + .use_dst_sid(1'b1), .dst_sid(dst_sid), + .use_vita_time(1'b1), .vita_time(vita_time + time_cnt)); + + /******************************************************** + ** Data to user from AXI Wrapper + ** - Throttles if header FIFO is full + ********************************************************/ + assign o_tdata = s_axis_data_tdata; + assign o_tvalid = s_axis_data_tvalid & header_in_tready; + assign o_tlast = s_axis_data_tlast; + assign s_axis_data_tready = o_tready & header_in_tready; + + /******************************************************** + ** Data from user to AXI Wrapper + ** - Handles asserting tlast + ** - Asserts tlast in three cases: + ** 1) User asserts tlast + ** 2) End of a burst of samples (i.e. when tkeep deasserts). + ** 3) End of a packet, in case VITA is different between packets + ********************************************************/ + wire ready; + always @(posedge clk) begin + if (reset | clear) begin + pipe_tdata <= 'd0; + pipe_tvalid <= 1'b0; + pipe_tlast <= 1'b0; + pipe_tkeep <= 1'b0; + end else begin + if (pipe_tready) begin + pipe_tdata <= i_reg_tdata; + pipe_tvalid <= i_reg_tvalid; + pipe_tlast <= i_reg_tlast; + pipe_tkeep <= i_reg_tkeep; + end + end + end + + assign pipe_tready = ~pipe_tvalid | (m_axis_data_tready & header_out_tvalid & (i_reg_tvalid | (m_axis_data_tvalid & m_axis_data_tlast))); + assign i_reg_tready = pipe_tready; + assign m_axis_data_tdata = pipe_tdata; + assign m_axis_data_tvalid = pipe_tvalid & pipe_tkeep & (i_reg_tvalid | m_axis_data_tlast) & header_out_tvalid; + assign m_axis_data_tlast = pipe_tlast | (i_reg_tvalid & ~i_reg_tkeep) | (word_cnt >= payload_length); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/axi_bit_reduce.v b/fpga/usrp3/lib/rfnoc/axi_bit_reduce.v new file mode 100644 index 000000000..cb6290ff0 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_bit_reduce.v @@ -0,0 +1,27 @@ +// +// Copyright Ettus Research, 2014 +// Copyright 2014 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// no logic, just wires + +module axi_bit_reduce + #(parameter WIDTH_IN=48, + parameter WIDTH_OUT=25, + parameter DROP_TOP=6, + parameter VECTOR_WIDTH=1) // vector_width = 2 for complex, 1 for real + (input [VECTOR_WIDTH*WIDTH_IN-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [VECTOR_WIDTH*WIDTH_OUT-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + genvar i; + generate + for(i=0; i<VECTOR_WIDTH; i=i+1) + assign o_tdata[(i+1)*WIDTH_OUT-1:i*WIDTH_OUT] = i_tdata[(i+1)*WIDTH_IN-DROP_TOP-1:i*WIDTH_IN+(WIDTH_IN-WIDTH_OUT)-DROP_TOP]; + endgenerate + + assign o_tlast = i_tlast; + assign o_tvalid = i_tvalid; + assign i_tready = o_tready; + +endmodule // axi_bit_sel diff --git a/fpga/usrp3/lib/rfnoc/axi_clip.v b/fpga/usrp3/lib/rfnoc/axi_clip.v new file mode 100644 index 000000000..4ae776f95 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_clip.v @@ -0,0 +1,35 @@ +// +// Copyright 2016 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module axi_clip + #(parameter WIDTH_IN=24, + parameter WIDTH_OUT=16, + parameter FIFOSIZE=0) // leave at 0 for a normal single flop + (input clk, input reset, + input [WIDTH_IN-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH_OUT-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + generate + if (WIDTH_IN == WIDTH_OUT) begin + assign o_tdata = i_tdata; + assign o_tlast = i_tlast; + assign o_tvalid = i_tvalid; + assign i_tready = o_tready; + end else begin + wire overflow = |i_tdata[WIDTH_IN-1:WIDTH_OUT-1] & ~(&i_tdata[WIDTH_IN-1:WIDTH_OUT-1]); + wire [WIDTH_OUT-1:0] out = overflow ? + (i_tdata[WIDTH_IN-1] ? {1'b1,{(WIDTH_OUT-1){1'b0}}} : {1'b0,{(WIDTH_OUT-1){1'b1}}}) : + i_tdata[WIDTH_OUT-1:0]; + + axi_fifo #(.WIDTH(WIDTH_OUT+1), .SIZE(FIFOSIZE)) flop + (.clk(clk), .reset(reset), .clear(1'b0), + .i_tdata({i_tlast, out}), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata({o_tlast, o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready), + .occupied(), .space()); + end + endgenerate + +endmodule // axi_clip diff --git a/fpga/usrp3/lib/rfnoc/axi_clip_complex.v b/fpga/usrp3/lib/rfnoc/axi_clip_complex.v new file mode 100644 index 000000000..439345990 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_clip_complex.v @@ -0,0 +1,43 @@ +// +// Copyright 2016 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module axi_clip_complex #( + parameter WIDTH_IN = 24, + parameter WIDTH_OUT = 16, + parameter FIFOSIZE = 0) // leave at 0 for a single flop +( + input clk, input reset, + input [2*WIDTH_IN-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [2*WIDTH_OUT-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + wire [WIDTH_IN-1:0] ii_tdata, iq_tdata; + wire ii_tlast, ii_tvalid, ii_tready, iq_tlast, iq_tvalid, iq_tready; + + wire [WIDTH_OUT-1:0] oi_tdata, oq_tdata; + wire oi_tlast, oi_tvalid, oi_tready, oq_tlast, oq_tvalid, oq_tready; + + split_complex #(.WIDTH(WIDTH_IN)) split_complex ( + .i_tdata(i_tdata), .i_tlast(i_tlast), .i_tvalid(i_tvalid), .i_tready(i_tready), + .oi_tdata(ii_tdata), .oi_tlast(ii_tlast), .oi_tvalid(ii_tvalid), .oi_tready(ii_tready), + .oq_tdata(iq_tdata), .oq_tlast(iq_tlast), .oq_tvalid(iq_tvalid), .oq_tready(iq_tready)); + + axi_clip #(.WIDTH_IN(WIDTH_IN), .WIDTH_OUT(WIDTH_OUT), .FIFOSIZE(FIFOSIZE)) axi_clip_i ( + .clk(clk), .reset(reset), + .i_tdata(ii_tdata), .i_tlast(ii_tlast), .i_tvalid(ii_tvalid), .i_tready(ii_tready), + .o_tdata(oi_tdata), .o_tlast(oi_tlast), .o_tvalid(oi_tvalid), .o_tready(oi_tready)); + + axi_clip #(.WIDTH_IN(WIDTH_IN), .WIDTH_OUT(WIDTH_OUT), .FIFOSIZE(FIFOSIZE)) axi_clip_q ( + .clk(clk), .reset(reset), + .i_tdata(iq_tdata), .i_tlast(iq_tlast), .i_tvalid(iq_tvalid), .i_tready(iq_tready), + .o_tdata(oq_tdata), .o_tlast(oq_tlast), .o_tvalid(oq_tvalid), .o_tready(oq_tready)); + + join_complex #(.WIDTH(WIDTH_OUT)) join_complex ( + .ii_tdata(oi_tdata), .ii_tlast(oi_tlast), .ii_tvalid(oi_tvalid), .ii_tready(oi_tready), + .iq_tdata(oq_tdata), .iq_tlast(oq_tlast), .iq_tvalid(oq_tvalid), .iq_tready(oq_tready), + .o_tdata(o_tdata), .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready)); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/axi_clip_unsigned.v b/fpga/usrp3/lib/rfnoc/axi_clip_unsigned.v new file mode 100644 index 000000000..4ad393693 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_clip_unsigned.v @@ -0,0 +1,24 @@ +// +// Copyright 2015, Ettus Research +// +// Reduces bit width by removing MSBs. Input assumed to be unsigned. + +module axi_clip_unsigned + #(parameter WIDTH_IN=24, + parameter WIDTH_OUT=16, + parameter FIFOSIZE=0) // leave at 0 for a normal single flop + (input clk, input reset, + input [WIDTH_IN-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH_OUT-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + wire overflow = |i_tdata[WIDTH_IN-1:WIDTH_OUT]; + + wire [WIDTH_OUT-1:0] out = overflow ? {1'b0,{(WIDTH_OUT-1){1'b1}}} : i_tdata[WIDTH_OUT-1:0]; + + axi_fifo #(.WIDTH(WIDTH_OUT+1), .SIZE(FIFOSIZE)) flop + (.clk(clk), .reset(reset), .clear(1'b0), + .i_tdata({i_tlast, out}), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata({o_tlast, o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready), + .occupied(), .space()); + +endmodule // clip diff --git a/fpga/usrp3/lib/rfnoc/axi_deserializer.v b/fpga/usrp3/lib/rfnoc/axi_deserializer.v new file mode 100644 index 000000000..ba2736315 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_deserializer.v @@ -0,0 +1,62 @@ +// +// Copyright 2015 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module axi_deserializer #( + parameter WIDTH = 32) +( + input clk, input rst, input reverse_output, + input i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + reg flop_tlast, flop_tlast_latch; + reg [WIDTH-1:0] flop_tdata; + wire [WIDTH-1:0] flop_tdata_reverse, flop_tdata_int; + reg flop_tvalid; + reg [$clog2(WIDTH)-1:0] i; + + always @(posedge clk) begin + if (rst) begin + flop_tdata <= 'd0; + flop_tvalid <= 1'b0; + flop_tlast <= 1'b0; + flop_tlast_latch <= 1'b0; + i <= WIDTH-1; + end else begin + flop_tvalid <= 1'b0; + if (i_tvalid & i_tready) begin + flop_tdata[i] <= i_tdata; + if (i_tlast) begin + flop_tlast_latch <= 1'b1; + end + if (i == 0) begin + flop_tvalid <= 1'b1; + flop_tlast <= flop_tlast_latch; + flop_tlast_latch <= 1'b0; + i <= WIDTH-1; + end else begin + i <= i - 1; + end + end + end + end + + // Reverse flop_tdata + genvar k; + generate + for (k = 0; k < WIDTH; k = k + 1) assign flop_tdata_reverse[WIDTH-1-k] = flop_tdata; + endgenerate + + assign flop_tdata_int = reverse_output ? flop_tdata_reverse : flop_tdata; + + axi_fifo_flop2 #(.WIDTH(WIDTH)) axi_fifo_flop ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({flop_tlast,flop_tdata_int}), .i_tvalid(flop_tvalid), .i_tready(i_tready), + .o_tdata({o_tlast,o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready), + .space(), .occupied()); + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/axi_drop_partial_packet.v b/fpga/usrp3/lib/rfnoc/axi_drop_partial_packet.v new file mode 100644 index 000000000..a4dbae654 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_drop_partial_packet.v @@ -0,0 +1,140 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Drop packets that are larger or smaller than the allowed packet size. +// + +module axi_drop_partial_packet #( + parameter WIDTH = 32, + parameter MAX_PKT_SIZE = 1024, + parameter HOLD_LAST_WORD = 0, // Hold off sending last word until next full packet arrives + parameter SR_PKT_SIZE_ADDR = 1 +)( + input clk, input reset, input clear, + input flush, // If using HOLD_LAST_WORD, will forcibly release all words in FIFO + input set_stb, input [7:0] set_addr, input [31:0] set_data, + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + generate + // Packet size of 1 means it is impossible to form a partial packet, so this module does nothing... + if (MAX_PKT_SIZE == 1) begin + assign o_tdata = i_tdata; + assign o_tlast = i_tlast; + assign o_tvalid = i_tvalid; + assign i_tready = o_tready; + // All other packet sizes + end else begin + // Settings register + wire [$clog2(MAX_PKT_SIZE+1)-1:0] sr_pkt_size; + setting_reg #(.my_addr(SR_PKT_SIZE_ADDR), .width($clog2(MAX_PKT_SIZE+1)), .at_reset(1)) set_pkt_size ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(sr_pkt_size), .changed()); + + // Do not change n unless block is not active + reg active; + reg [$clog2(MAX_PKT_SIZE+1)-1:0] pkt_size = 1; + always @(posedge clk) begin + if (reset | clear) begin + active <= 1'b0; + end else begin + if (i_tready & i_tvalid) begin + active <= 1'b1; + end + end + if (clear | ~active) begin + pkt_size <= (sr_pkt_size == 0) ? 1 : sr_pkt_size; + end + end + + wire [WIDTH-1:0] int_tdata; + wire int_tlast, int_tvalid, int_tready; + wire i_tlast_int, i_terror; + + reg small_pkt, large_pkt; + wire hold_last_sample; + reg release_last; + reg [$clog2(MAX_PKT_SIZE+1)-1:0] in_cnt; + reg [15:0] in_pkt_cnt, in_pkt_cnt_hold, out_pkt_cnt; + always @(posedge clk) begin + if (reset | clear) begin + small_pkt <= 1'b0; + large_pkt <= 1'b0; + release_last <= 1'b0; + in_cnt <= 1; + in_pkt_cnt <= 0; + in_pkt_cnt_hold <= 0; + out_pkt_cnt <= 0; + end else begin + if (i_tvalid & i_tready) begin + if (in_cnt == pkt_size | i_tlast_int) begin + in_cnt <= 1; + end else begin + in_cnt <= in_cnt + 1; + end + end + if (pkt_size == 1) begin + small_pkt <= 1'b0; + large_pkt <= 1'b0; + end else begin + if (i_tvalid & i_tready) begin + if ((in_cnt == pkt_size-1'b1) & ~i_tlast) begin + small_pkt <= 1'b0; + end else begin + small_pkt <= 1'b1; + end + if ((in_cnt == pkt_size) & ~i_tlast) begin + large_pkt <= 1'b1; + end + if (large_pkt) begin + large_pkt <= 1'b0; + end + end + end + if (i_tvalid & i_tready & i_tlast & ~i_terror) begin + in_pkt_cnt <= in_pkt_cnt + 1'b1; + end + if (int_tvalid & int_tready & int_tlast & ~hold_last_sample) begin + out_pkt_cnt <= out_pkt_cnt + 1'b1; + end + if ((i_tvalid & i_tready & i_terror) | flush) begin + release_last <= 1'b1; + in_pkt_cnt_hold <= in_pkt_cnt; + end else if (in_pkt_cnt_hold == out_pkt_cnt) begin + release_last <= 1'b0; + end + end + end + + assign hold_last_sample = ((in_pkt_cnt == out_pkt_cnt) | ((in_pkt_cnt == out_pkt_cnt+1) & ~release_last)) & (pkt_size != 1); + + assign i_tlast_int = i_tlast | large_pkt; + assign i_terror = i_tlast & i_tvalid & (small_pkt | large_pkt); + + // FIFO with ability to rewind write pointer back if input packet is flagged as bad + axi_packet_gate #(.WIDTH(WIDTH+1), .SIZE($clog2(MAX_PKT_SIZE+1)), .USE_AS_BUFF(1)) pkt_gate_i ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({i_tlast,i_tdata}), .i_tvalid(i_tvalid), .i_tlast(i_tlast_int), .i_terror(i_terror), .i_tready(i_tready), + .o_tdata({int_tlast,int_tdata}), .o_tvalid(int_tvalid), .o_tlast(), .o_tready(int_tready & ~(hold_last_sample & int_tlast))); + + // Generate output register to hold on to last word + if (HOLD_LAST_WORD) begin + axi_fifo_flop2 #(.WIDTH(WIDTH+1)) axi_fifo_flop2 ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({int_tlast,int_tdata}), .i_tvalid(int_tvalid & ~(hold_last_sample & int_tlast)), .i_tready(int_tready), + .o_tdata({o_tlast,o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready), + .space(), .occupied()); + end else begin + assign o_tdata = int_tdata; + assign o_tlast = int_tlast; + assign o_tvalid = int_tvalid; + assign int_tready = o_tready; + end + end + endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/axi_fir_filter.v b/fpga/usrp3/lib/rfnoc/axi_fir_filter.v new file mode 100644 index 000000000..4fffab8af --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_fir_filter.v @@ -0,0 +1,307 @@ +// +// Copyright 2017 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Parameterized FIR filter with AXI stream interface. +// Has several optimizations to resource utilization such as +// using half the number of DSP slices for symmetric coefficients, +// skipping coefficients that are always set to zero, and using +// internal DSP slice registers to hold coefficients. +// +// For the most efficient DSP slice inference use these settings: +// - IN_WIDTH < 25, COEFF_WIDTH < 18, ACCUM_WIDTH < 48 +// +// Parameters: +// IN_WIDTH - Input width +// COEFF_WIDTH - Coefficient width +// OUT_WIDTH - Output width +// NUM_COEFFS - Number of coefficients / taps +// CLIP_BITS - If IN_WIDTH != OUT_WIDTH, number of MSBs to drop +// ACCUM_WIDTH - Accumulator width +// COEFFS_VEC - Vector of NUM_COEFFS values each of width COEFF_WIDTH to +// initialize coeffs. Defaults to an impulse. +// RELOADABLE_COEFFS - Enable (1) or disable (0) reloading coefficients at runtime (via reload bus) +// BLANK_OUTPUT - Disable (1) or enable (0) output when filling internal pipeline +// SYMMETRIC_COEFFS - Reduce multiplier usage by approx half if coefficients are symmetric +// SKIP_ZERO_COEFFS - Reduce multiplier usage by assuming zero valued coefficients in +// DEFAULT_COEFFS are always zero. Useful for halfband filters. +// USE_EMBEDDED_REGS_COEFFS - Reduce register usage by only using embedded registers in DSP slices. +// Updating taps while streaming will cause temporary output corruption! +// +// Notes: +// - If using USE_EMBEDDED_REGS_COEFFS, coefficients must be written at least once as COEFFS_VEC is ignored! +// - If using SYMMETRIC_COEFFS, only send half the coeffients! i.e. NUM_COEFFS = 11, send the first 6. +// +module axi_fir_filter #( + parameter IN_WIDTH = 16, + parameter COEFF_WIDTH = 16, + parameter OUT_WIDTH = 16, + parameter NUM_COEFFS = 41, + parameter CLIP_BITS = $clog2(NUM_COEFFS), + parameter ACCUM_WIDTH = IN_WIDTH+COEFF_WIDTH+$clog2(NUM_COEFFS)-1, + parameter [NUM_COEFFS*COEFF_WIDTH-1:0] COEFFS_VEC = + {{1'b0,{(COEFF_WIDTH-1){1'b1}}},{(COEFF_WIDTH*(NUM_COEFFS-1)){1'b0}}}, + parameter RELOADABLE_COEFFS = 1, + parameter BLANK_OUTPUT = 1, + // Optimizations + parameter SYMMETRIC_COEFFS = 1, + parameter SKIP_ZERO_COEFFS = 0, + parameter USE_EMBEDDED_REGS_COEFFS = 1 +)( + input clk, + input reset, + input clear, + input [IN_WIDTH-1:0] s_axis_data_tdata, + input s_axis_data_tlast, + input s_axis_data_tvalid, + output s_axis_data_tready, + output [OUT_WIDTH-1:0] m_axis_data_tdata, + output m_axis_data_tlast, + output m_axis_data_tvalid, + input m_axis_data_tready, + input [COEFF_WIDTH-1:0] s_axis_reload_tdata, + input s_axis_reload_tvalid, + input s_axis_reload_tlast, + output s_axis_reload_tready +); + + localparam NUM_SLICES = SYMMETRIC_COEFFS ? + NUM_COEFFS/2 + NUM_COEFFS[0] : // Manual round up, Vivado complains when using $ceil() + NUM_COEFFS; + localparam ODD_LEN = NUM_COEFFS[0]; + localparam PIPELINE_DELAY = NUM_SLICES+4; // +4 pipeline depth in fir_filter_slice.v + + wire [ACCUM_WIDTH-1:0] m_axis_data_tdata_int; + wire m_axis_data_tvalid_int, m_axis_data_tready_int, m_axis_data_tlast_int; + + /////////////////////////////////////////////////////// + // + // Coefficient loading / reloading + // + /////////////////////////////////////////////////////// + reg [COEFF_WIDTH-1:0] coeffs[0:NUM_SLICES-1]; + reg coeff_load_stb = 1'b1; + generate + integer k; + if (RELOADABLE_COEFFS) begin + // Use DSP slice registers to hold coefficients. While loading + // coefficients, input sample data should be throttled if corrupted + // output samples are unacceptable. + if (USE_EMBEDDED_REGS_COEFFS) begin + always @(*) begin + coeff_load_stb <= s_axis_reload_tvalid & s_axis_reload_tready; + end + // Use shift register to hold coefficients. Coefficients are loaded + // into fir filter slice on tlast. + end else begin + always @(posedge clk) begin + if (reset | clear) begin + for (k = 0; k < NUM_SLICES; k = k + 1) begin + coeffs[k] <= COEFFS_VEC[COEFF_WIDTH*k +: COEFF_WIDTH]; + end + // Initialize coefficients at reset + coeff_load_stb <= 1'b1; + end else begin + if (s_axis_reload_tvalid & s_axis_reload_tready) begin + for (k = NUM_SLICES-1; k > 0; k = k - 1) begin + coeffs[k-1] <= coeffs[k]; + end + coeffs[NUM_SLICES-1] <= s_axis_reload_tdata; + end + coeff_load_stb <= s_axis_reload_tvalid & s_axis_reload_tready & s_axis_reload_tlast; + end + end + end + // Coefficients are static + end else begin + always @(*) begin + for (k = 0; k < NUM_SLICES; k = k + 1) begin + coeffs[k] <= COEFFS_VEC[COEFF_WIDTH*k +: COEFF_WIDTH]; + coeff_load_stb <= 1'b1; + end + end + end + endgenerate + + assign s_axis_reload_tready = 1'b1; + + /////////////////////////////////////////////////////// + // + // Systolic FIR Filter + // + /////////////////////////////////////////////////////// + // + // Block Diagram + // - Configuration: SYMMETRIC_COEFFS = 1 and USE_EMBEDDED_REGS_COEFFS = 1 + // + // +-------+ + // Sample In | Shift | Sample In delayed NUM_COEFF + // +-------->| Reg |-------------------------------------------------------------> + // | +-------+ | | + // | v v + // | +-----+ +-----+ + // | | | | | + // | +-----+ +-----+ + // | | | + // | +--+ +--+ | Sample +--+ +--+ | + // | | | | | | Forward | | | | | + // '-->| |-->| |-----------^---------->| |-->| |----------^--------------> + // | | | | | | | | | | | | + // +--+ +--+ v v +--+ +--+ v v + // +------------+ +------------+ + // | Pre-Adder | | Pre-Adder | + // +------------+ +------------+ + // | | + // v v + // +-----+ +-----+ + // *----------------------* | | | | + // | Note: Coeffs are | +-----+ +-----+ + // | loaded backwards | | | + // | for proper alignment | | .----------------------^----------------< + // *----------------------* | | | + // +--+ +--+ v | +--+ +--+ v + // Coeff In | | | | +------------+ | | | | | +------------+ + // .--->| |-->| |-->| Multiplier | '-->| |-->| |->| Multiplier | + // | | | | | +------------+ | | | | | +------------+ + // | +--+ +--+ | +--+ | +--+ | + // | | | | + // '------------------------^------------------' | + // | Coeff | + // v Forward v + // +-----+ +-----+ + // | | | | + // +-----+ +-----+ + // | | + // v +--+ Sample v +--+ + // +------------+ | | Out +------------+ | | + // | Adder |-->| |----------->| Adder |-->| |--> + // +------------+ | | +------------+ | | + // +--+ +--+ + // + /////////////////////////////////////////////////////// + genvar i, l; + generate + // Counter to track pipeline fullness + reg [$clog2(PIPELINE_DELAY):0] cnt; + always @(posedge clk) begin + if (reset | clear) begin + cnt <= 0; + end else if (s_axis_data_tvalid & s_axis_data_tready) begin + if (cnt < PIPELINE_DELAY) begin + cnt <= cnt + 1; + end + end + end + + // Sample delay shift register for efficient implementation + // when using symmetric coefficients + reg [IN_WIDTH-1:0] sample_shift_reg[0:NUM_COEFFS-1]; + integer n; + initial begin + for (n = 0; n < NUM_COEFFS; n = n + 1) begin + sample_shift_reg[n] <= 0; + end + end + always @(posedge clk) begin + if (s_axis_data_tvalid & s_axis_data_tready) begin + for (n = 1; n < NUM_COEFFS; n = n + 1) begin + sample_shift_reg[n] <= sample_shift_reg[n-1]; + end + sample_shift_reg[0] <= s_axis_data_tdata; + end + end + + // tlast shift register + reg [PIPELINE_DELAY-1:0] tlast_shift_reg = 0; + integer m; + always @(posedge clk) begin + if (s_axis_data_tvalid & s_axis_data_tready) begin + for (m = 1; m < PIPELINE_DELAY; m = m + 1) begin + tlast_shift_reg[m] <= tlast_shift_reg[m-1]; + end + tlast_shift_reg[0] <= s_axis_data_tlast; + end + end + + wire [IN_WIDTH-1:0] sample_in[0:NUM_SLICES]; // Use [0:NUM_SLICES] instead of + wire [ACCUM_WIDTH-1:0] sample_accum[0:NUM_SLICES]; // [0:NUM_SLICES-1] to make the + wire [COEFF_WIDTH-1:0] coeff_forward[0:NUM_SLICES]; // generate loop easier to read + assign sample_in[0] = s_axis_data_tdata; + assign sample_accum[0] = 0; + assign coeff_forward[NUM_SLICES] = s_axis_reload_tdata; + + // Build up FIR filter with multiply-accumulate slices (fir_filter_slice) + for (i = 0; i < NUM_SLICES; i = i + 1) begin + // Map zero'd out coefficients to simple register delays. + if ((SKIP_ZERO_COEFFS == 1) && (COEFFS_VEC[COEFF_WIDTH*i +: COEFF_WIDTH] == 0)) begin + reg [ACCUM_WIDTH-1:0] sample_accum_reg; + reg [IN_WIDTH-1:0] sample_in_reg[0:1]; + reg [COEFF_WIDTH-1:0] coeff_in_reg; + always @(posedge clk) begin + if (reset | clear) begin + sample_in_reg[0] <= 0; + sample_in_reg[1] <= 0; + sample_accum_reg <= 0; + coeff_in_reg <= 0; + end else begin + if (s_axis_data_tvalid & s_axis_data_tready) begin + sample_in_reg[0] <= sample_in[i]; + sample_in_reg[1] <= sample_in_reg[0]; + sample_accum_reg <= sample_accum[i]; + end + if (coeff_load_stb) begin + coeff_in_reg <= coeff_forward[i+1]; + end + end + end + assign sample_in[i+1] = sample_in_reg[1]; + assign sample_accum[i+1] = sample_accum_reg; + assign coeff_forward[i] = coeff_in_reg; + end else begin + fir_filter_slice #( + .IN_WIDTH(IN_WIDTH), + .COEFF_WIDTH(COEFF_WIDTH), + .ACCUM_WIDTH(ACCUM_WIDTH), + .OUT_WIDTH(ACCUM_WIDTH)) + fir_filter_slice ( + .clk(clk), + .reset(reset), + .clear(clear), + .sample_in_stb(s_axis_data_tvalid & s_axis_data_tready), + .sample_in_a(sample_in[i]), + // sample_in_b is used to implement symmetric coefficients, always 0 if SYMMETRIC_COEFFS = 0 + .sample_in_b(((SYMMETRIC_COEFFS == 0) || ((ODD_LEN == 1) && (i == NUM_SLICES-1))) ? {IN_WIDTH{1'b0}} : sample_shift_reg[NUM_COEFFS-1]), + .sample_forward(sample_in[i+1]), + // For proper coeffient loading, coeff_forward must be shifted in backwards. coeffs[] is already backwards. + .coeff_in(((USE_EMBEDDED_REGS_COEFFS == 1) && (RELOADABLE_COEFFS == 1)) ? coeff_forward[i+1] : coeffs[i]), + .coeff_forward(coeff_forward[i]), + .coeff_load_stb(coeff_load_stb), + .sample_accum(sample_accum[i]), + .sample_out(sample_accum[i+1])); + end + end + assign m_axis_data_tdata_int = (BLANK_OUTPUT == 1) & (cnt < PIPELINE_DELAY) ? 0 : sample_accum[NUM_SLICES]; + assign m_axis_data_tvalid_int = (BLANK_OUTPUT == 1) & (cnt < PIPELINE_DELAY) ? 1'b0 : s_axis_data_tvalid; + assign m_axis_data_tlast_int = (BLANK_OUTPUT == 1) ? ((cnt < PIPELINE_DELAY) ? 1'b0 : tlast_shift_reg[PIPELINE_DELAY-1]) : s_axis_data_tlast; + assign s_axis_data_tready = m_axis_data_tready_int; + endgenerate + + axi_round_and_clip #( + .WIDTH_IN(ACCUM_WIDTH), + .WIDTH_OUT(OUT_WIDTH), + .CLIP_BITS(CLIP_BITS)) + inst_axi_round_and_clip ( + .clk(clk), + .reset(reset | clear), + .i_tdata(m_axis_data_tdata_int), + .i_tlast(m_axis_data_tlast_int), + .i_tvalid(m_axis_data_tvalid_int), + .i_tready(m_axis_data_tready_int), + .o_tdata(m_axis_data_tdata), + .o_tlast(m_axis_data_tlast), + .o_tvalid(m_axis_data_tvalid), + .o_tready(m_axis_data_tready)); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/axi_fir_filter_dec.v b/fpga/usrp3/lib/rfnoc/axi_fir_filter_dec.v new file mode 100644 index 000000000..30be7501e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_fir_filter_dec.v @@ -0,0 +1,83 @@ +// +// Copyright 2018 Ettus Research, a National Instruments Brand +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +// Generic FIR filter with decimator + +module axi_fir_filter_dec #( + parameter WIDTH = 24, + parameter COEFF_WIDTH = 18, + parameter NUM_COEFFS = 47, + parameter [NUM_COEFFS*COEFF_WIDTH-1:0] COEFFS_VEC = + {{1'b0,{(COEFF_WIDTH-1){1'b1}}},{(COEFF_WIDTH*(NUM_COEFFS-1)){1'b0}}}, + parameter BLANK_OUTPUT = 0 +)( + input clk, + input reset, + + input [2*WIDTH-1:0] i_tdata, + input i_tlast, + input i_tvalid, + output i_tready, + + output [2*WIDTH-1:0] o_tdata, + output o_tlast, + output o_tvalid, + input o_tready +); + +wire [WIDTH-1:0] tdata_fir0_dec0; +wire tvalid_fir0_dec0; +wire tlast_fir0_dec0; +wire tready_fir0_dec0; + +wire [WIDTH-1:0] tdata_fir1_dec1; +wire tvalid_fir1_dec1; +wire tlast_fir1_dec1; +wire tready_fir1_dec1; + +wire [WIDTH-1:0] tdata_fir0; +wire [WIDTH-1:0] tdata_fir1; +wire [WIDTH-1:0] tdata_dec0; +wire [WIDTH-1:0] tdata_dec1; + +// Split input data into real and imag. part. +assign tdata_fir0 = i_tdata[2*WIDTH-1:WIDTH]; +assign tdata_fir1 = i_tdata[WIDTH-1:0]; + +// FIR filter for real part +axi_fir_filter #(.IN_WIDTH(WIDTH), .COEFF_WIDTH(COEFF_WIDTH), .OUT_WIDTH(WIDTH), .NUM_COEFFS(NUM_COEFFS), .COEFFS_VEC(COEFFS_VEC), + .RELOADABLE_COEFFS(0), .BLANK_OUTPUT(0), .SYMMETRIC_COEFFS(1), .SKIP_ZERO_COEFFS(1), .USE_EMBEDDED_REGS_COEFFS(0) +) hbfir0( + .clk(clk), .reset(reset), .clear(reset), + .s_axis_data_tdata(tdata_fir0), .s_axis_data_tlast(i_tlast), .s_axis_data_tvalid(i_tvalid), .s_axis_data_tready(i_tready), + .m_axis_data_tdata(tdata_fir0_dec0), .m_axis_data_tlast(tlast_fir0_dec0), .m_axis_data_tvalid(tvalid_fir0_dec0), .m_axis_data_tready(tready_fir0_dec0), + .s_axis_reload_tdata(18'd0), .s_axis_reload_tvalid(1'b0), .s_axis_reload_tlast(1'b0), .s_axis_reload_tready()); + +// FIR filter for imag. part +axi_fir_filter #(.IN_WIDTH(WIDTH), .COEFF_WIDTH(COEFF_WIDTH), .OUT_WIDTH(WIDTH), .NUM_COEFFS(NUM_COEFFS), .COEFFS_VEC(COEFFS_VEC), + .RELOADABLE_COEFFS(0), .BLANK_OUTPUT(0), .SYMMETRIC_COEFFS(1), .SKIP_ZERO_COEFFS(1), .USE_EMBEDDED_REGS_COEFFS(0) +) hbfir1( + .clk(clk), .reset(reset), .clear(reset), + .s_axis_data_tdata(tdata_fir1), .s_axis_data_tlast(i_tlast), .s_axis_data_tvalid(i_tvalid), .s_axis_data_tready(), + .m_axis_data_tdata(tdata_fir1_dec1), .m_axis_data_tlast(tlast_fir1_dec1), .m_axis_data_tvalid(tvalid_fir1_dec1), .m_axis_data_tready(tready_fir1_dec1), + .s_axis_reload_tdata(18'd0), .s_axis_reload_tvalid(1'b0), .s_axis_reload_tlast(1'b0), .s_axis_reload_tready()); + +// Decimator for real part +keep_one_in_n #(.KEEP_FIRST(1), .WIDTH(WIDTH), .MAX_N(4) +) dec0 ( + .clk(clk), .reset(reset), .vector_mode(1'b0), .n(2), + .i_tdata(tdata_fir0_dec0), .i_tlast(tlast_fir0_dec0), .i_tvalid(tvalid_fir0_dec0), .i_tready(tready_fir0_dec0), + .o_tdata(tdata_dec0), .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready)); + +// Decimator for imag. part +keep_one_in_n #(.KEEP_FIRST(1), .WIDTH(WIDTH), .MAX_N(4) +) dec1 ( + .clk(clk), .reset(reset), .vector_mode(1'b0), .n(2), + .i_tdata(tdata_fir1_dec1), .i_tlast(tlast_fir1_dec1), .i_tvalid(tvalid_fir1_dec1), .i_tready(tready_fir1_dec1), + .o_tdata(tdata_dec1), .o_tlast(), .o_tvalid(), .o_tready(o_tready)); + +assign o_tdata = {tdata_dec0, tdata_dec1}; + +endmodule // axi_fir_filter_dec diff --git a/fpga/usrp3/lib/rfnoc/axi_join.v b/fpga/usrp3/lib/rfnoc/axi_join.v new file mode 100644 index 000000000..a2f7c567d --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_join.v @@ -0,0 +1,18 @@ + + +// Copyright 2014 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +module axi_join + #(parameter INPUTS=2) + (input [INPUTS-1:0] i_tlast, input [INPUTS-1:0] i_tvalid, output [INPUTS-1:0] i_tready, + output o_tlast, output o_tvalid, input o_tready); + + wire all_here = &i_tvalid; + assign o_tvalid = all_here; + assign o_tlast = |i_tlast; + assign i_tready = {INPUTS{o_tready & all_here}}; + +endmodule // axi_join diff --git a/fpga/usrp3/lib/rfnoc/axi_packer.v b/fpga/usrp3/lib/rfnoc/axi_packer.v new file mode 100644 index 000000000..fbed250f6 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_packer.v @@ -0,0 +1,67 @@ +// +// Copyright 2015 Ettus Research +// +// Increases AXI stream bit width by concatenating inputs across multiple clock cycles. +// Note: WIDTH_IN must be a multiple of WIDTH_OUT + +module axi_packer #( + parameter WIDTH_IN = 8, // Input bit width + parameter WIDTH_OUT = 32, // Output bit width + parameter REVERSE = 0) // 0: Fill LSB to MSB, 1: Fill MSB to LSB +( + input clk, input reset, input clear, + input [WIDTH_IN-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH_OUT-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + localparam M = WIDTH_OUT/WIDTH_IN; + + reg packed_tlast, packed_tvalid; + reg [WIDTH_OUT-1:0] packed_tdata; + reg [$clog2(M)-1:0] cnt; + reg i_tlast_hold; + integer i; + always @(posedge clk) begin + if (reset | clear) begin + i_tlast_hold <= 1'b0; + cnt <= 'd0; + packed_tdata <= 'd0; + packed_tvalid <= 1'b0; + packed_tlast <= 1'b0; + end else begin + packed_tvalid <= 1'b0; + packed_tlast <= 1'b0; + if (i_tvalid & i_tready) begin + if (i_tlast) begin + i_tlast_hold <= 1'b1; + end + if (cnt > M-1) begin + cnt <= 'd0; + packed_tlast <= i_tlast_hold; + packed_tvalid <= 1'b1; + end else begin + cnt <= cnt + 1; + end + if (REVERSE) begin + packed_tdata[WIDTH_OUT-1:WIDTH_OUT-WIDTH_IN] <= i_tdata; + for (i = 0; i < M-1; i = i + 1) begin + packed_tdata[WIDTH_OUT-(i+1)*WIDTH_IN-1 -: WIDTH_IN] <= packed_tdata[WIDTH_OUT-i*WIDTH_IN-1 -: WIDTH_IN]; + end + end else begin + packed_tdata[WIDTH_IN-1:0] <= i_tdata; + for (i = 0; i < M-1; i = i + 1) begin + packed_tdata[(i+2)*WIDTH_IN-1 -: WIDTH_IN] <= packed_tdata[(i+1)*WIDTH_IN-1 -: WIDTH_IN]; + end + end + end + end + end + + axi_fifo_flop #(.WIDTH(WIDTH_OUT+1)) axi_fifo_flop_pack ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({packed_tlast,packed_tdata}), .i_tvalid(packed_tvalid), .i_tready(i_tready), + .o_tdata({o_tlast,o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready), + .space(), .occupied()); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/axi_packet_mux.v b/fpga/usrp3/lib/rfnoc/axi_packet_mux.v new file mode 100644 index 000000000..53ce890ba --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_packet_mux.v @@ -0,0 +1,41 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Muxes and packetizes input AXI-streams. Assumes header on tuser. + +module axi_packet_mux #( + parameter NUM_INPUTS = 1, + parameter MUX_PRE_FIFO_SIZE = 0, // Use 0 (most efficient) unless there is need to compensate for unbalanced input path latencies + parameter MUX_POST_FIFO_SIZE = 0, // Generally leave at 0, similar effect as FIFO_SIZE + parameter FIFO_SIZE = 5 // Size of FIFO in CHDR framer +)( + input clk, input reset, input clear, + input [NUM_INPUTS*64-1:0] i_tdata, input [NUM_INPUTS-1:0] i_tlast, input [NUM_INPUTS-1:0] i_tvalid, output [NUM_INPUTS-1:0] i_tready, input [NUM_INPUTS*128-1:0] i_tuser, + output [63:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + wire [NUM_INPUTS*(64+128)-1:0] i_tdata_flat; + genvar i; + generate + for (i = 0; i < NUM_INPUTS; i = i + 1) begin + assign i_tdata_flat[(128+64)*(i+1)-1:(128+64)*i] = {i_tuser[128*(i+1)-1:128*i],i_tdata[64*(i+1)-1:64*i]}; + end + endgenerate + + wire [63:0] int_tdata; + wire [127:0] int_tuser; + wire int_tlast, int_tvalid, int_tready; + axi_mux #(.PRIO(0), .WIDTH(128+64), .PRE_FIFO_SIZE(MUX_PRE_FIFO_SIZE), .POST_FIFO_SIZE(MUX_POST_FIFO_SIZE), .SIZE(NUM_INPUTS)) axi_mux ( + .clk(clk), .reset(reset), .clear(1'b0), + .i_tdata(i_tdata_flat), .i_tlast(i_tlast), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata({int_tuser, int_tdata}), .o_tlast(int_tlast), .o_tvalid(int_tvalid), .o_tready(int_tready)); + + chdr_framer #(.SIZE(FIFO_SIZE), .WIDTH(64)) chdr_framer ( + .clk(clk), .reset(reset), .clear(1'b0), + .i_tdata(int_tdata), .i_tuser(int_tuser), .i_tlast(int_tlast), .i_tvalid(int_tvalid), .i_tready(int_tready), + .o_tdata(o_tdata), .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready)); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/axi_pipe.v b/fpga/usrp3/lib/rfnoc/axi_pipe.v new file mode 100644 index 000000000..4c635a5bf --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_pipe.v @@ -0,0 +1,67 @@ + +// Copyright 2014 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + + +module axi_pipe + #(parameter STAGES=3) + (input clk, input reset, input clear, + input i_tlast, input i_tvalid, output i_tready, + output o_tlast, output o_tvalid, input o_tready, + output [STAGES-1:0] enables, + output reg [STAGES-1:0] valids); + + assign o_tvalid = valids[STAGES-1]; + assign i_tready = enables[0]; + + // ////////////////////////////// + // Valids + genvar i; + generate + for(i=1; i<STAGES; i=i+1) + always @(posedge clk) + if(reset | clear) + valids[i] <= 1'b0; + else + valids[i] <= valids[i-1] | (valids[i] & ~enables[i]); + endgenerate + + always @(posedge clk) + if(reset | clear) + valids[0] <= 1'b0; + else + valids[0] <= i_tvalid | (valids[0] & ~enables[0]); + + // ////////////////////////////// + // Enables + genvar j; + generate + for(j=0; j<STAGES; j=j+1) + assign enables[j] = o_tready | (|(~valids[STAGES-1:j])); + endgenerate + + // ///////////////////////////// + // tlast + reg [STAGES-1:0] tlast; + + genvar k; + generate + for(k=1; k<STAGES; k=k+1) + always @(posedge clk) + if(reset | clear) + tlast[k] <= 1'b0; + else if(enables[k]) + tlast[k] <= tlast[k-1]; + endgenerate + + always @(posedge clk) + if(reset | clear) + tlast[0] <= 1'b0; + else if(enables[0]) + tlast[0] <= i_tlast; + + assign o_tlast = tlast[STAGES-1]; + +endmodule // axi_pipe diff --git a/fpga/usrp3/lib/rfnoc/axi_pipe_join.v b/fpga/usrp3/lib/rfnoc/axi_pipe_join.v new file mode 100644 index 000000000..635a7bf75 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_pipe_join.v @@ -0,0 +1,46 @@ + +// Copyright 2014 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + + +module axi_pipe_join + #(parameter PRE_JOIN_STAGES0=3, + parameter PRE_JOIN_STAGES1=3, + parameter POST_JOIN_STAGES=3) + (input clk, input reset, input clear, + input i0_tlast, input i0_tvalid, output i0_tready, + input i1_tlast, input i1_tvalid, output i1_tready, + output o_tlast, output o_tvalid, input o_tready, + output [PRE_JOIN_STAGES0-1:0] enables0, + output [PRE_JOIN_STAGES1-1:0] enables1, + output [POST_JOIN_STAGES-1:0] enables_post); + + wire join_tlast, join_tvalid, join_tready; + wire int0_tlast, int0_tvalid, int0_tready; + wire int1_tlast, int1_tvalid, int1_tready; + + axi_pipe #(.STAGES(PRE_JOIN_STAGES0)) pipe_pre_0 + (.clk(clk), .reset(reset), .clear(clear), + .i_tlast(i0_tlast), .i_tvalid(i0_tvalid), .i_tready(i0_tready), + .o_tlast(int0_tlast), .o_tvalid(int0_tvalid), .o_tready(int0_tready), + .enables(enables0), .valids()); + + axi_pipe #(.STAGES(PRE_JOIN_STAGES1)) pipe_pre_1 + (.clk(clk), .reset(reset), .clear(clear), + .i_tlast(i1_tlast), .i_tvalid(i1_tvalid), .i_tready(i1_tready), + .o_tlast(int1_tlast), .o_tvalid(int1_tvalid), .o_tready(int1_tready), + .enables(enables1), .valids()); + + axi_pipe #(.STAGES(POST_JOIN_STAGES)) pipe_post + (.clk(clk), .reset(reset), .clear(clear), + .i_tlast(join_tlast), .i_tvalid(join_tvalid), .i_tready(join_tready), + .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready), + .enables(enables_post), .valids()); + + axi_join #(.INPUTS(2)) joiner + (.i_tlast({int1_tlast,int0_tlast}), .i_tvalid({int1_tvalid,int0_tvalid}), .i_tready({int1_tready,int0_tready}), + .o_tlast(join_tlast), .o_tvalid(join_tvalid), .o_tready(join_tready)); + +endmodule // axi_pipe_join diff --git a/fpga/usrp3/lib/rfnoc/axi_pipe_mac.v b/fpga/usrp3/lib/rfnoc/axi_pipe_mac.v new file mode 100644 index 000000000..de4ade51f --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_pipe_mac.v @@ -0,0 +1,79 @@ + +// Copyright 2014 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +// Latency must be 3 or 4 + +module axi_pipe_mac + #(parameter LATENCY=3, + parameter CASCADE_IN=0) + (input clk, input reset, input clear, + input a_tlast, input a_tvalid, output a_tready, + input b_tlast, input b_tvalid, output b_tready, + input c_tlast, input c_tvalid, output c_tready, + output p_tlast, output p_tvalid, input p_tready, + output [LATENCY-3:0] enables_a, + output [LATENCY-3:0] enables_b, + output enable_c, + output enable_m, + output enable_p); + + wire join_tlast, join_tvalid, join_tready; + wire join1_tlast, join1_tvalid, join1_tready; + wire int0_tlast, int0_tvalid, int0_tready; + wire int1_tlast, int1_tvalid, int1_tready; + wire int2_tlast, int2_tvalid, int2_tready; + wire int3_tlast, int3_tvalid, int3_tready; + + axi_pipe #(.STAGES(LATENCY-2)) pipe_a + (.clk(clk), .reset(reset), .clear(clear), + .i_tlast(a_tlast), .i_tvalid(a_tvalid), .i_tready(a_tready), + .o_tlast(int0_tlast), .o_tvalid(int0_tvalid), .o_tready(int0_tready), + .enables(enables_a), .valids()); + + axi_pipe #(.STAGES(LATENCY-2)) pipe_b + (.clk(clk), .reset(reset), .clear(clear), + .i_tlast(b_tlast), .i_tvalid(b_tvalid), .i_tready(b_tready), + .o_tlast(int1_tlast), .o_tvalid(int1_tvalid), .o_tready(int1_tready), + .enables(enables_b), .valids()); + + axi_join #(.INPUTS(2)) join_ab + (.i_tlast({int1_tlast,int0_tlast}), .i_tvalid({int1_tvalid,int0_tvalid}), .i_tready({int1_tready,int0_tready}), + .o_tlast(join_tlast), .o_tvalid(join_tvalid), .o_tready(join_tready)); + + axi_pipe #(.STAGES(1)) pipe_m + (.clk(clk), .reset(reset), .clear(clear), + .i_tlast(join_tlast), .i_tvalid(join_tvalid), .i_tready(join_tready), + .o_tlast(int2_tlast), .o_tvalid(int2_tvalid), .o_tready(int2_tready), + .enables(enable_m), .valids()); + + // If we use the cascade input, there is no flop in the input side adder + generate + if(CASCADE_IN) + begin + assign int3_tlast = c_tlast; + assign int3_tvalid = c_tvalid; + assign c_tready = int3_tready; + assign enable_c = 1'b0; + end + else + axi_pipe #(.STAGES(1)) pipe_c + (.clk(clk), .reset(reset), .clear(clear), + .i_tlast(c_tlast), .i_tvalid(c_tvalid), .i_tready(c_tready), + .o_tlast(int3_tlast), .o_tvalid(int3_tvalid), .o_tready(int3_tready), + .enables(enable_c), .valids()); + endgenerate + + axi_join #(.INPUTS(2)) joiner_mc + (.i_tlast({int2_tlast,int3_tlast}), .i_tvalid({int2_tvalid,int3_tvalid}), .i_tready({int2_tready,int3_tready}), + .o_tlast(join1_tlast), .o_tvalid(join1_tvalid), .o_tready(join1_tready)); + + axi_pipe #(.STAGES(1)) pipe_p + (.clk(clk), .reset(reset), .clear(clear), + .i_tlast(join1_tlast), .i_tvalid(join1_tvalid), .i_tready(join1_tready), + .o_tlast(p_tlast), .o_tvalid(p_tvalid), .o_tready(p_tready), + .enables(enable_p), .valids()); + +endmodule // axi_pipe_mac diff --git a/fpga/usrp3/lib/rfnoc/axi_rate_change.v b/fpga/usrp3/lib/rfnoc/axi_rate_change.v new file mode 100644 index 000000000..166e03c46 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_rate_change.v @@ -0,0 +1,491 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// - Implements rate change of N:M (a.k.a. M/N), handles headers automatically +// - Note: N should always be written before M in software to prevent false rate changes +// while the block is active +// - User code is responsible for generating correct number of outputs per input +// > Example: When set 1/N, after N input samples block should output 1 sample. If +// user code's pipelining requires additional samples to "push" the 1 +// sample out, it is the user's responsibility to make the mechanism +// (such as injecting extra samples) to do so. +// - Will always send an integer multiple of N samples to user logic. This ensures +// the user will not need to manually clear a "partial output sample" stuck in their +// pipeline due to an uneven (in respect to decimation rate) number of input samples. +// - Can optionally strobe clear_user after receiving packet with EOB +// > enable_clear_user must be enabled via CONFIG settings register +// > Warning: Input will be throttled until last packet has completely passed through +// user code to prevent clearing valid data. In certain conditions, this throttling +// can have a significant impact on throughput. +// - Output packet size will be identical to input packet size. The only exception is +// the final output packet, which may be shorter due to a partial input packet. +// Limitations: +// - Rate changes are ignored while active. Block must be cleared or packet with EOB +// (and enable_clear_user is set) will cause new rates to be loaded. +// - Can potentially use large amounts of block RAM when using large decimation rates +// (greater than 2K). This occurs due to the feature that the block always sends a multiple +// of N samples to the user. Implementing this feature requires N samples to be buffered. +// - User code with long pipelines may need to increase HEADER_FIFOSIZE. The debug signal +// warning_header_fifo_full is useful in determining this case. +// +// Settings Registers: +// sr_n: Number of input samples per M output samples (Always write N before M) +// sr_m: Number of output samples per N input samples +// sr_config: 0: Enable clear_user signal. + +module axi_rate_change #( + parameter WIDTH = 32, // Input bit width, must be a power of 2 and greater than or equal to 8. + parameter MAX_N = 2**16, + parameter MAX_M = 2**16, + parameter MAXIMIZE_OUTPUT_PKT_LEN = 1, + // Settings registers + parameter SR_N_ADDR = 0, + parameter SR_M_ADDR = 1, + parameter SR_CONFIG_ADDR = 2 +)( + input clk, input reset, input clear, + output clear_user, // Strobed after end of burst. Throttles input. Useful for resetting user code between bursts. + input [15:0] src_sid, input [15:0] dst_sid, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, input [127:0] i_tuser, + output [WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready, output [127:0] o_tuser, + output [WIDTH-1:0] m_axis_data_tdata, output m_axis_data_tlast, output m_axis_data_tvalid, input m_axis_data_tready, + input [WIDTH-1:0] s_axis_data_tdata, input s_axis_data_tlast, input s_axis_data_tvalid, output s_axis_data_tready, + // Debugging signals: + // - Warnings indicate there may be an issue with user code. + // - Errors mean the user code has violated a rule. + // - Signals latch once set and block must be reset to clear. + output reg warning_long_throttle, // In the throttle state for a "long" time. + output reg error_extra_outputs, // User code generated extra outputs, i.e. received more than the expected M outputs. + output reg error_drop_pkt_lockup // Drop partial packet module is not accepting data even though user code is ready. +); + + reg [$clog2(MAX_N+1)-1:0] n = 1; + reg [$clog2(MAX_M+1)-1:0] m = 1; + + wire [WIDTH-1:0] i_reg_tdata; + wire i_reg_tvalid, i_reg_tready, i_reg_tlast; + wire i_reg_tvalid_int, i_reg_tready_int, i_reg_tlast_int; + + reg throttle = 1'b1, first_header, partial_first_word; + reg [15:0] word_cnt_div_n; + reg [$clog2(MAX_N+1)-1:0] word_cnt_div_n_frac = 1; + reg [$clog2(MAX_N+1)-1:0] in_pkt_cnt = 1; + + reg send_done; + reg rate_changed; + + /******************************************************** + ** Settings Registers + ********************************************************/ + wire [$clog2(MAX_N+1)-1:0] sr_n; + wire n_changed; + setting_reg #(.my_addr(SR_N_ADDR), .width($clog2(MAX_N+1)), .at_reset(1)) set_n ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(sr_n), .changed(n_changed)); + + wire [$clog2(MAX_M+1)-1:0] sr_m; + wire m_changed; + setting_reg #(.my_addr(SR_M_ADDR), .width($clog2(MAX_M+1)), .at_reset(1)) set_m ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(sr_m), .changed(m_changed)); + + wire sr_config; + wire enable_clear_user; // Enable strobing clear_user between bursts. + setting_reg #(.my_addr(SR_CONFIG_ADDR), .width(1), .at_reset(1'b1)) set_config ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(sr_config), .changed()); + assign enable_clear_user = sr_config; + + /******************************************************** + ** Header, word count FIFOs + ** - Header provides VITA Time and payload length for + ** output packets + ** - Word count provides a normalized count for the + ** output state machine to know when it has consumed + ** the final input sample in a burst. + ********************************************************/ + // Decode input header + wire [127:0] i_reg_tuser; + wire has_time_in, eob_in, eob_in_header; + wire [15:0] payload_length_in; + reg [15:0] payload_length_out; + wire [63:0] vita_time_in; + cvita_hdr_decoder cvita_hdr_decoder_in_header ( + .header(i_reg_tuser), .pkt_type(), .eob(eob_in_header), + .has_time(has_time_in), .seqnum(), .length(), .payload_length(payload_length_in), + .src_sid(), .dst_sid(), .vita_time(vita_time_in)); + + assign eob_in = eob_in_header | rate_changed; + + reg [15:0] word_cnt_div_n_tdata; + wire [15:0] word_cnt_div_n_fifo_tdata; + reg word_cnt_div_n_tvalid; + wire word_cnt_div_n_tready, word_cnt_div_n_fifo_tvalid, word_cnt_div_n_fifo_tready; + axi_fifo #(.WIDTH(16), .SIZE(0)) axi_fifo_word_cnt ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata(word_cnt_div_n_tdata), .i_tvalid(word_cnt_div_n_tvalid), .i_tready(word_cnt_div_n_tready), + .o_tdata(word_cnt_div_n_fifo_tdata), .o_tvalid(word_cnt_div_n_fifo_tvalid), .o_tready(word_cnt_div_n_fifo_tready), + .space(), .occupied()); + + /******************************************************** + ** Register input stream + ** - Upsteam will be throttled when clearing user logic + ********************************************************/ + // Input register with header + axi_fifo_flop #(.WIDTH(WIDTH+1+128)) axi_fifo_flop_input ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({i_tlast,i_tdata,i_tuser}), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata({i_reg_tlast,i_reg_tdata,i_reg_tuser}), .o_tvalid(i_reg_tvalid_int), .o_tready(i_reg_tready), + .space(), .occupied()); + + assign i_reg_tready = i_reg_tready_int & word_cnt_div_n_tready & ~throttle; + assign i_reg_tvalid = i_reg_tvalid_int & word_cnt_div_n_tready & ~throttle; + // Assert AXI Drop Partial Packet's i_tlast every N samples, which is used to detect and drop + // partial output samples. + assign i_reg_tlast_int = (word_cnt_div_n_frac == n) | (eob_in & i_reg_tlast); + + /******************************************************** + ** Input state machine + ********************************************************/ + reg [1:0] input_state; + localparam RECV_INIT = 0; + localparam RECV = 1; + localparam RECV_WAIT_FOR_SEND_DONE = 2; + + always @(posedge clk) begin + if (reset | clear) begin + n <= 1; + m <= 1; + rate_changed <= 1'b0; + first_header <= 1'b1; + partial_first_word <= 1'b1; + payload_length_out <= 'd0; + word_cnt_div_n <= 0; + word_cnt_div_n_frac <= 1; + throttle <= 1'b1; + word_cnt_div_n_tvalid <= 1'b0; + word_cnt_div_n_tdata <= 'd0; + input_state <= RECV_INIT; + end else begin + if (word_cnt_div_n_tvalid & word_cnt_div_n_tready) begin + word_cnt_div_n_tvalid <= 1'b0; + end + // Input state machine + case (input_state) + RECV_INIT : begin + n <= sr_n; + m <= sr_m; + rate_changed <= 1'b0; + first_header <= 1'b1; + partial_first_word <= 1'b1; + payload_length_out <= 'd0; + word_cnt_div_n <= 0; + word_cnt_div_n_frac <= 1; + if (i_reg_tvalid_int & word_cnt_div_n_tready) begin + throttle <= 1'b0; + input_state <= RECV; + end + end + // Logic used by the RECV state to track several variables: + // word_cnt_div_n: Number of words received divided by n. + // Needed for tracking final sample in a burst. + // word_cnt_div_n_frac: Used to increment word_cnt_div_n. Can be + // thought of as the fractional part of + // word_cnt_div_n. + // in_pkt_cnt: Similar to in_word_cnt, but for packets. Used + // to determine when a group of N packets has been + // received to store the next header. + // first_header: We only use the header from the first packet in + // a group of N packets (this greatly reduces + // the header FIFO size). + RECV : begin + // If rate changed, force a EOB. + if (m_changed) begin + rate_changed <= 1'b1; + end + if (i_reg_tvalid & i_reg_tready) begin + // Track the number of words sent to the user divided by N. + // At the end of a burst, this value is forwarded to the output + // state machine and used to determine when the final sample has + // arrived from the user code. + if (word_cnt_div_n_frac == n) begin + word_cnt_div_n <= word_cnt_div_n + 1; + word_cnt_div_n_frac <= 1; + end else begin + word_cnt_div_n_frac <= word_cnt_div_n_frac + 1; + end + // Use payload length from first packet + first_header <= 1'b0; + if (first_header) begin + payload_length_out <= payload_length_in; + end else if (MAXIMIZE_OUTPUT_PKT_LEN) begin + if (payload_length_out < payload_length_in) begin + payload_length_out <= payload_length_in; + end + end + // Track when at least N input samples have been received in this burst + if (partial_first_word & (word_cnt_div_n_frac == n)) begin + partial_first_word <= 1'b0; + end + // Burst ended before we received enough samples to form + // at least one full output sample. + // Note: axi_drop_partial_packet automatically handles + // dropping the partial sample. + if (i_reg_tlast & eob_in & partial_first_word) begin + input_state <= RECV_INIT; + end else begin + if (i_reg_tlast) begin + // At the end of a burst, forward the number of words divided by N to + // the output state machine via a FIFO. This allows the output state + // machine to know when it has received the final output word. + // We use a FIFO in case the bursts are very small and we + // need to store several of these values. + if (eob_in) begin + word_cnt_div_n_tdata <= word_cnt_div_n + (word_cnt_div_n_frac == n); + word_cnt_div_n_tvalid <= 1'b1; + throttle <= 1'b1; + if (enable_clear_user) begin + input_state <= RECV_WAIT_FOR_SEND_DONE; + end else begin + input_state <= RECV_INIT; + end + end + end + end + end + end + // Wait until last sample has been output and user logic is cleared + // WARNING: This can be a huge bubble state! However, since it only happens with + // EOBs, it should be infrequent. + RECV_WAIT_FOR_SEND_DONE : begin + if (send_done) begin + input_state <= RECV_INIT; + end + end + default : begin + input_state <= RECV_INIT; + end + endcase + end + end + + assign clear_user = send_done & enable_clear_user; + + /******************************************************** + ** AXI Drop Partial Packet (to user) + ** - Enforces sending integer multiple of N samples + ** to user + ********************************************************/ + axi_drop_partial_packet #( + .WIDTH(WIDTH+1), + .HOLD_LAST_WORD(1), + .MAX_PKT_SIZE(MAX_N), + .SR_PKT_SIZE_ADDR(SR_N_ADDR)) + axi_drop_partial_packet ( + .clk(clk), .reset(reset), .clear(clear | send_done), + .flush(word_cnt_div_n_tvalid & word_cnt_div_n_tready), // Flush on EOB + .set_stb(set_stb), .set_addr(set_addr), .set_data(set_data), + .i_tdata({i_reg_tlast,i_reg_tdata}), .i_tvalid(i_reg_tvalid), .i_tlast(i_reg_tlast_int), .i_tready(i_reg_tready_int), + .o_tdata({m_axis_data_tlast,m_axis_data_tdata}), .o_tvalid(m_axis_data_tvalid), .o_tlast(/* Unused */), .o_tready(m_axis_data_tready)); + + /******************************************************** + ** Output state machine + ********************************************************/ + reg [1:0] output_state; + localparam SEND_INIT = 0; + localparam SEND = 1; + + wire [WIDTH-1:0] o_reg_tdata; + wire [127:0] o_reg_tuser; + wire o_reg_tvalid, o_reg_tready, o_reg_tlast, o_reg_tlast_int; + + reg [15:0] out_payload_cnt = (WIDTH/8); + reg [15:0] word_cnt_div_m; + reg [$clog2(MAX_M+1)-1:0] word_cnt_div_m_frac = 1; + reg [$clog2(MAX_M+1)-1:0] out_pkt_cnt = 1; + + // End of burst tracking. Compare the number of words sent to the user divided by N + // to the number of words received from the user divided by M. When they equal each other + // then we have received the last word from the user in this burst. + // Note: Using word_cnt_div_n_fifo_tdata to make sure the last word is identified before + // it has been consumed. + wire last_word_in_burst = word_cnt_div_n_fifo_tvalid & + (word_cnt_div_m == word_cnt_div_n_fifo_tdata) & + (word_cnt_div_m_frac == m); + + always @(posedge clk) begin + if (reset | clear) begin + word_cnt_div_m <= 1; + word_cnt_div_m_frac <= 1; + out_payload_cnt <= (WIDTH/8); + send_done <= 1'b0; + output_state <= SEND_INIT; + end else begin + // Track + case (output_state) + SEND_INIT : begin + word_cnt_div_m <= 1; + word_cnt_div_m_frac <= 1; + out_payload_cnt <= (WIDTH/8); + send_done <= 1'b0; + output_state <= SEND; + end + SEND : begin + if (o_reg_tvalid & o_reg_tready) begin + if (o_reg_tlast) begin + // Track number of samples from user to set tlast + out_payload_cnt <= (WIDTH/8); + end else begin + out_payload_cnt <= out_payload_cnt + (WIDTH/8); + end + // Track number of words consumed divided by M. This is used + // in conjunction with word_cnt_div_n to determine when we have received + // the last word in a burst from the user. + if (word_cnt_div_m_frac == m) begin + word_cnt_div_m <= word_cnt_div_m + 1; + word_cnt_div_m_frac <= 1; + end else begin + word_cnt_div_m_frac <= word_cnt_div_m_frac + 1; + end + if (last_word_in_burst) begin + send_done <= 1'b1; + output_state <= SEND_INIT; + end + end + end + default : begin + output_state <= SEND_INIT; + end + endcase + end + end + + // Only pop this FIFO at EOB. + assign word_cnt_div_n_fifo_tready = o_reg_tvalid & o_reg_tready & last_word_in_burst; + + /******************************************************** + ** Adjust VITA time + ********************************************************/ + localparam VT_INIT = 0; + localparam VT_INCREMENT = 1; + reg vt_state; + + reg has_time_out, has_time_clear; + reg [63:0] vita_time_out, vita_time_accum; + + always @(posedge clk) begin + if (reset | clear) begin + vt_state <= VT_INIT; + end else begin + case (vt_state) + VT_INIT : begin + has_time_clear <= 1'b0; + if (i_reg_tvalid & i_reg_tready & first_header) begin + vita_time_out <= vita_time_in; + vita_time_accum <= vita_time_in; + has_time_out <= has_time_in; + vt_state <= VT_INCREMENT; + end + end + VT_INCREMENT : begin + // Stop sending vita time if user does not send vita time + if (i_reg_tvalid & ~has_time_in) begin + has_time_clear <= 1'b1; + end + if (o_reg_tvalid & o_reg_tready) begin + if (o_reg_tlast) begin + if (has_time_clear) begin + has_time_out <= 1'b0; + end + vita_time_out <= vita_time_accum + n; + end + vita_time_accum <= vita_time_accum + n; + if (last_word_in_burst) begin + vt_state <= VT_INIT; + end + end + end + default : begin + vt_state <= VT_INIT; + end + endcase + end + end + + // Create output header + cvita_hdr_encoder cvita_hdr_encoder ( + .pkt_type(2'd0), .eob(last_word_in_burst), .has_time(has_time_out), + .seqnum(12'd0), .payload_length(16'd0), // Not needed, handled by AXI Wrapper + .src_sid(src_sid), .dst_sid(dst_sid), + .vita_time(vita_time_out), + .header(o_reg_tuser)); + + /******************************************************** + ** Register input stream from user and output stream + ********************************************************/ + assign o_reg_tlast = o_reg_tlast_int | + // End of packet + (out_payload_cnt == payload_length_out) | + // EOB, could be a partial packet + last_word_in_burst; + + axi_fifo_flop #(.WIDTH(WIDTH+1)) axi_fifo_flop_from_user_0 ( + .clk(clk), .reset(reset), .clear(clear), + // FIXME: If user asserts tlast at the wrong time, it likely causes a deadlock. For now ignore tlast. + //.i_tdata({s_axis_data_tlast,s_axis_data_tdata}), .i_tvalid(s_axis_data_tvalid), .i_tready(s_axis_data_tready), + .i_tdata({1'b0,s_axis_data_tdata}), .i_tvalid(s_axis_data_tvalid), .i_tready(s_axis_data_tready), + .o_tdata({o_reg_tlast_int,o_reg_tdata}), .o_tvalid(o_reg_tvalid), .o_tready(o_reg_tready), + .space(), .occupied()); + + // Output register with header + axi_fifo_flop #(.WIDTH(WIDTH+1+128)) axi_fifo_flop_output ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({o_reg_tlast,o_reg_tdata,o_reg_tuser}), .i_tvalid(o_reg_tvalid), .i_tready(o_reg_tready), + .o_tdata({o_tlast,o_tdata,o_tuser}), .o_tvalid(o_tvalid), .o_tready(o_tready), + .space(), .occupied()); + + /******************************************************** + ** Error / warning signals + ********************************************************/ + reg [23:0] counter_header_fifo_full, counter_throttle, counter_drop_pkt_lockup; + reg [2:0] counter_header_fifo_empty; + always @(posedge clk) begin + if (reset) begin + warning_long_throttle <= 1'b0; + error_extra_outputs <= 1'b0; + error_drop_pkt_lockup <= 1'b0; + counter_throttle <= 0; + counter_header_fifo_full <= 0; + counter_drop_pkt_lockup <= 0; + counter_header_fifo_empty <= 0; + end else begin + // In throttle state for a "long" time + if (throttle) begin + counter_throttle <= counter_throttle + 1; + if (counter_throttle == 2**24-1) begin + warning_long_throttle <= 1'b1; + end + end else begin + counter_throttle <= 0; + end + // More than M outputs per N inputs + if (word_cnt_div_n_fifo_tvalid & (word_cnt_div_m > word_cnt_div_n_fifo_tdata)) begin + error_extra_outputs <= 1'b1; + end + // Bad internal state. AXI drop partial packet is in a lockup condition. + if (~i_reg_tready_int & m_axis_data_tready) begin + counter_drop_pkt_lockup <= counter_drop_pkt_lockup + 1; + if (counter_drop_pkt_lockup == 2**24-1) begin + error_drop_pkt_lockup <= 1'b1; + end + end else begin + counter_drop_pkt_lockup <= 0; + end + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/axi_repeat.v b/fpga/usrp3/lib/rfnoc/axi_repeat.v new file mode 100644 index 000000000..c19d6962a --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_repeat.v @@ -0,0 +1,31 @@ +// +// Copyright 2015 Ettus Research +// +// Output always valid (except in reset) and repeats last valid i_tdata & i_tlast value + +module axi_repeat +#( + parameter WIDTH = 16) +( + input clk, input reset, + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output reg [WIDTH-1:0] o_tdata, output reg o_tlast, output reg o_tvalid, input o_tready +); + + assign i_tready = 1'b1; + + always @(posedge clk) begin + if (reset) begin + o_tdata <= 'd0; + o_tlast <= 'd0; + o_tvalid <= 'd0; + end else begin + if (i_tvalid) begin + o_tvalid <= 1'b1; + o_tlast <= i_tlast; + o_tdata <= o_tdata; + end + end + end + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/axi_round.v b/fpga/usrp3/lib/rfnoc/axi_round.v new file mode 100644 index 000000000..75a343ba5 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_round.v @@ -0,0 +1,56 @@ +// +// Copyright 2016 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module axi_round + #(parameter WIDTH_IN=17, + parameter WIDTH_OUT=16, + parameter round_to_zero=0, // original behavior + parameter round_to_nearest=1, // lowest noise + parameter trunc=0, // round to negative infinity + parameter FIFOSIZE=0) // leave at 0 for a normal single flop + (input clk, input reset, + input [WIDTH_IN-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH_OUT-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + wire [WIDTH_OUT-1:0] out; + + generate + if (WIDTH_IN == WIDTH_OUT) begin + assign o_tdata = i_tdata; + assign o_tlast = i_tlast; + assign o_tvalid = i_tvalid; + assign i_tready = o_tready; + end else begin + wire round_corr,round_corr_trunc,round_corr_rtz,round_corr_nearest,round_corr_nearest_safe; + wire [WIDTH_IN-WIDTH_OUT-1:0] err; + + assign round_corr_trunc = 0; + assign round_corr_rtz = (i_tdata[WIDTH_IN-1] & |i_tdata[WIDTH_IN-WIDTH_OUT-1:0]); + assign round_corr_nearest = i_tdata[WIDTH_IN-WIDTH_OUT-1]; + + assign round_corr_nearest_safe = (WIDTH_IN-WIDTH_OUT > 1) ? + ((~i_tdata[WIDTH_IN-1] & (&i_tdata[WIDTH_IN-2:WIDTH_IN-WIDTH_OUT])) ? 1'b0 : round_corr_nearest) : + round_corr_nearest; + + assign round_corr = round_to_nearest ? round_corr_nearest_safe : + trunc ? round_corr_trunc : + round_to_zero ? round_corr_rtz : + 0; // default to trunc + + assign out = i_tdata[WIDTH_IN-1:WIDTH_IN-WIDTH_OUT] + round_corr; + + assign err = i_tdata - {out,{(WIDTH_IN-WIDTH_OUT){1'b0}}}; + + axi_fifo #(.WIDTH(WIDTH_OUT+1), .SIZE(FIFOSIZE)) flop + (.clk(clk), .reset(reset), .clear(1'b0), + .i_tdata({i_tlast, out}), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata({o_tlast, o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready), + .occupied(), .space()); + + end + endgenerate + +endmodule // axi_round diff --git a/fpga/usrp3/lib/rfnoc/axi_round_and_clip.v b/fpga/usrp3/lib/rfnoc/axi_round_and_clip.v new file mode 100644 index 000000000..781e85ec3 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_round_and_clip.v @@ -0,0 +1,69 @@ +// +// Copyright 2014, Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// +// Reduce the bitwidth of an input stream. This module will both round +// and clip, meaning that the lower bits will be removed by rounding to +// the nearest value, and the top bits will be snipped (but without +// causing numerical overflows). +// +// Example: If WIDTH_IN==24, WIDTH_OUT==16, and CLIP_BITS==3, the output +// Will remove the top 3 bits (by clipping), and remove the bottom 5 +// bits by rounding, leaving 24 - 3 - 5 == 16 bits. +// +// Note that this module has two stages (round, then clip) which will +// both have a FIFO of length FIFOSIZE. However, when a stage is not +// required it will have *no* FIFO, but instead just pass through the +// data. In the extreme case where WIDTH_IN==WIDTH_OUT and CLIP_BITS==0, +// there are no FIFOs and this module becomes an AXI stream passthrough. +module axi_round_and_clip +#( + parameter WIDTH_IN=24, + parameter WIDTH_OUT=16, + parameter CLIP_BITS=3, + parameter FIFOSIZE=1) // FIFOSIZE = 1, single output register +( + input clk, input reset, + input [WIDTH_IN-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH_OUT-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + wire [WIDTH_OUT+CLIP_BITS-1:0] int_tdata; + wire int_tlast, int_tvalid, int_tready; + + generate + if (WIDTH_IN == WIDTH_OUT+CLIP_BITS) begin + assign int_tdata = i_tdata; + assign int_tlast = i_tlast; + assign int_tvalid = i_tvalid; + assign i_tready = int_tready; + end else begin + axi_round #( + .WIDTH_IN(WIDTH_IN), .WIDTH_OUT(WIDTH_OUT+CLIP_BITS), + .round_to_nearest(1), .FIFOSIZE(FIFOSIZE)) + axi_round ( + .clk(clk), .reset(reset), + .i_tdata(i_tdata), .i_tlast(i_tlast), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata(int_tdata), .o_tlast(int_tlast), .o_tvalid(int_tvalid), .o_tready(int_tready)); + end + + if (CLIP_BITS == 0) begin + assign o_tdata = int_tdata; + assign o_tlast = int_tlast; + assign o_tvalid = int_tvalid; + assign int_tready = o_tready; + end else begin + axi_clip #( + .WIDTH_IN(WIDTH_OUT+CLIP_BITS), .WIDTH_OUT(WIDTH_OUT), + .FIFOSIZE(FIFOSIZE)) + axi_clip ( + .clk(clk), .reset(reset), + .i_tdata(int_tdata), .i_tlast(int_tlast), .i_tvalid(int_tvalid), .i_tready(int_tready), + .o_tdata(o_tdata), .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready)); + end + endgenerate + +endmodule // round_and_clip diff --git a/fpga/usrp3/lib/rfnoc/axi_round_and_clip_complex.v b/fpga/usrp3/lib/rfnoc/axi_round_and_clip_complex.v new file mode 100644 index 000000000..c3e446509 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_round_and_clip_complex.v @@ -0,0 +1,43 @@ + + +// Copyright 2014, Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +module axi_round_and_clip_complex + #(parameter WIDTH_IN=24, + parameter WIDTH_OUT=16, + parameter CLIP_BITS=3, + parameter FIFOSIZE=1) // FIFOSIZE = 1, single output register + (input clk, input reset, + input [2*WIDTH_IN-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [2*WIDTH_OUT-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + wire [WIDTH_IN-1:0] ii_tdata, iq_tdata; + wire ii_tlast, ii_tvalid, ii_tready, iq_tlast, iq_tvalid, iq_tready; + + wire [WIDTH_OUT-1:0] oi_tdata, oq_tdata; + wire oi_tlast, oi_tvalid, oi_tready, oq_tlast, oq_tvalid, oq_tready; + + split_complex #(.WIDTH(WIDTH_IN)) split + (.i_tdata(i_tdata), .i_tlast(i_tlast), .i_tvalid(i_tvalid), .i_tready(i_tready), + .oi_tdata(ii_tdata), .oi_tlast(ii_tlast), .oi_tvalid(ii_tvalid), .oi_tready(ii_tready), + .oq_tdata(iq_tdata), .oq_tlast(iq_tlast), .oq_tvalid(iq_tvalid), .oq_tready(iq_tready)); + + axi_round_and_clip #(.WIDTH_IN(WIDTH_IN), .WIDTH_OUT(WIDTH_OUT), .CLIP_BITS(CLIP_BITS), .FIFOSIZE(FIFOSIZE)) r_and_c_i + (.clk(clk), .reset(reset), + .i_tdata(ii_tdata), .i_tlast(ii_tlast), .i_tvalid(ii_tvalid), .i_tready(ii_tready), + .o_tdata(oi_tdata), .o_tlast(oi_tlast), .o_tvalid(oi_tvalid), .o_tready(oi_tready)); + + axi_round_and_clip #(.WIDTH_IN(WIDTH_IN), .WIDTH_OUT(WIDTH_OUT), .CLIP_BITS(CLIP_BITS), .FIFOSIZE(FIFOSIZE)) r_and_c_q + (.clk(clk), .reset(reset), + .i_tdata(iq_tdata), .i_tlast(iq_tlast), .i_tvalid(iq_tvalid), .i_tready(iq_tready), + .o_tdata(oq_tdata), .o_tlast(oq_tlast), .o_tvalid(oq_tvalid), .o_tready(oq_tready)); + + join_complex #(.WIDTH(WIDTH_OUT)) join_complex + (.ii_tdata(oi_tdata), .ii_tlast(oi_tlast), .ii_tvalid(oi_tvalid), .ii_tready(oi_tready), + .iq_tdata(oq_tdata), .iq_tlast(oq_tlast), .iq_tvalid(oq_tvalid), .iq_tready(oq_tready), + .o_tdata(o_tdata), .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready)); + +endmodule // round_and_clip_complex diff --git a/fpga/usrp3/lib/rfnoc/axi_round_complex.v b/fpga/usrp3/lib/rfnoc/axi_round_complex.v new file mode 100644 index 000000000..d108c6abd --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_round_complex.v @@ -0,0 +1,43 @@ +// +// Copyright 2016 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module axi_round_complex #( + parameter WIDTH_IN = 24, + parameter WIDTH_OUT = 16, + parameter FIFOSIZE = 0) // leave at 0 for a normal single flop +( + input clk, input reset, + input [2*WIDTH_IN-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [2*WIDTH_OUT-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + wire [WIDTH_IN-1:0] ii_tdata, iq_tdata; + wire ii_tlast, ii_tvalid, ii_tready, iq_tlast, iq_tvalid, iq_tready; + + wire [WIDTH_OUT-1:0] oi_tdata, oq_tdata; + wire oi_tlast, oi_tvalid, oi_tready, oq_tlast, oq_tvalid, oq_tready; + + split_complex #(.WIDTH(WIDTH_IN)) split_complex ( + .i_tdata(i_tdata), .i_tlast(i_tlast), .i_tvalid(i_tvalid), .i_tready(i_tready), + .oi_tdata(ii_tdata), .oi_tlast(ii_tlast), .oi_tvalid(ii_tvalid), .oi_tready(ii_tready), + .oq_tdata(iq_tdata), .oq_tlast(iq_tlast), .oq_tvalid(iq_tvalid), .oq_tready(iq_tready)); + + axi_round #(.WIDTH_IN(WIDTH_IN), .WIDTH_OUT(WIDTH_OUT), .FIFOSIZE(FIFOSIZE)) axi_round_i ( + .clk(clk), .reset(reset), + .i_tdata(ii_tdata), .i_tlast(ii_tlast), .i_tvalid(ii_tvalid), .i_tready(ii_tready), + .o_tdata(oi_tdata), .o_tlast(oi_tlast), .o_tvalid(oi_tvalid), .o_tready(oi_tready)); + + axi_round #(.WIDTH_IN(WIDTH_IN), .WIDTH_OUT(WIDTH_OUT), .FIFOSIZE(FIFOSIZE)) axi_round_q ( + .clk(clk), .reset(reset), + .i_tdata(iq_tdata), .i_tlast(iq_tlast), .i_tvalid(iq_tvalid), .i_tready(iq_tready), + .o_tdata(oq_tdata), .o_tlast(oq_tlast), .o_tvalid(oq_tvalid), .o_tready(oq_tready)); + + join_complex #(.WIDTH(WIDTH_OUT)) join_complex ( + .ii_tdata(oi_tdata), .ii_tlast(oi_tlast), .ii_tvalid(oi_tvalid), .ii_tready(oi_tready), + .iq_tdata(oq_tdata), .iq_tlast(oq_tlast), .iq_tvalid(oq_tvalid), .iq_tready(oq_tready), + .o_tdata(o_tdata), .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready)); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/axi_serializer.v b/fpga/usrp3/lib/rfnoc/axi_serializer.v new file mode 100644 index 000000000..f71a54767 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_serializer.v @@ -0,0 +1,72 @@ +// +// Copyright 2015 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module axi_serializer #( + parameter WIDTH = 32) +( + input clk, input rst, input reverse_input, + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output reg i_tready, + output reg o_tdata, output reg o_tlast, output reg o_tvalid, input o_tready +); + + reg i_tlast_latch; + reg [WIDTH-1:0] serial_data_reg; + reg [$clog2(WIDTH)-1:0] serial_cnt; + reg serializing; + + always @(posedge clk) begin + if (rst) begin + i_tready <= 1'b0; + i_tlast_latch <= 1'b0; + o_tdata <= 1'b0; + o_tlast <= 1'b0; + o_tvalid <= 1'b0; + serial_data_reg <= 'd0; + serializing <= 1'b0; + serial_cnt <= 0; + end else begin + i_tready <= 1'b0; + // Shift out a bit when downstream can consume it + if (serializing & o_tready) begin + o_tvalid <= 1'b1; + if (reverse_input) begin + o_tdata <= serial_data_reg[0]; + serial_data_reg[WIDTH-2:0] <= serial_data_reg[WIDTH-1:1]; + end else begin + o_tdata <= serial_data_reg[WIDTH-1]; + serial_data_reg[WIDTH-1:1] <= serial_data_reg[WIDTH-2:0]; + end + if (serial_cnt == WIDTH-1) begin + serial_cnt <= 0; + serial_data_reg <= i_tdata; + i_tlast_latch <= i_tlast; + o_tlast <= i_tlast_latch; + if (~i_tvalid) begin + serializing <= 1'b0; + end else begin + i_tready <= 1'b1; + end + end else begin + serial_cnt <= serial_cnt + 1; + end + end else if (~serializing) begin + i_tready <= 1'b1; + if (o_tvalid && o_tready) begin + o_tvalid <= 1'b0; + end + // Serial shift register (serial_data_reg) is empty, load it + if (i_tvalid) begin + i_tready <= 1'b0; + serializing <= 1'b1; + i_tlast_latch <= i_tlast; + serial_data_reg <= i_tdata; + end + end + end + end + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/axi_sync.v b/fpga/usrp3/lib/rfnoc/axi_sync.v new file mode 100644 index 000000000..a881a5556 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_sync.v @@ -0,0 +1,63 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Synchronizes AXI stream buses so data is released on every port simultaneously. +// +// Note: If inputs have inequal bitwidths, use WIDTH_VEC instead of WIDTH to define +// the individual bit widths. Each bit width is defined with 8-bits stuffed +// into a vector of width 8*SIZE. +// + +module axi_sync #( + parameter SIZE = 2, + parameter WIDTH = 32, + parameter [32*SIZE-1:0] WIDTH_VEC = {SIZE{WIDTH[31:0]}}, + parameter FIFO_SIZE = 0 +)( + input clk, input reset, input clear, + input [msb(SIZE,WIDTH_VEC)-1:0] i_tdata, input [SIZE-1:0] i_tlast, input [SIZE-1:0] i_tvalid, output [SIZE-1:0] i_tready, + output [msb(SIZE,WIDTH_VEC)-1:0] o_tdata, output [SIZE-1:0] o_tlast, output [SIZE-1:0] o_tvalid, input [SIZE-1:0] o_tready +); + + // Helper function to calculate the MSB index based on widths stored in WIDTH_VEC. + // Note: If n is negative, returns 0 + function automatic integer msb(input integer n, input [SIZE*32-1:0] bit_vec); + integer i, total; + begin + total = 0; + if (n >= 0) begin + for (i = 0; i <= n; i = i + 1) begin + total = total + ((bit_vec >> 32*i) & 32'hFF); + end + end + msb = total; + end + endfunction + + wire [msb(SIZE,WIDTH_VEC)-1:0] int_tdata; + wire [SIZE-1:0] int_tlast, int_tvalid, int_tready; + + genvar i; + generate + for (i = 0; i < SIZE; i = i + 1) begin + axi_fifo #(.WIDTH(msb(i,WIDTH_VEC)-msb(i-1,WIDTH_VEC)+1), .SIZE(FIFO_SIZE)) axi_fifo ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({i_tlast[i],i_tdata[msb(i,WIDTH_VEC)-1:msb(i-1,WIDTH_VEC)]}), + .i_tvalid(i_tvalid[i]), .i_tready(i_tready[i]), + .o_tdata({int_tlast[i],int_tdata[msb(i,WIDTH_VEC)-1:msb(i-1,WIDTH_VEC)]}), + .o_tvalid(int_tvalid[i]), .o_tready(int_tready[i]), + .space(), .occupied()); + end + endgenerate + + assign o_tdata = int_tdata; + assign o_tlast = int_tlast; + + wire consume = (&int_tvalid) & (&o_tready); + assign int_tready = {SIZE{consume}}; + assign o_tvalid = {SIZE{consume}}; + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/axi_tag_time.v b/fpga/usrp3/lib/rfnoc/axi_tag_time.v new file mode 100644 index 000000000..35698ef72 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_tag_time.v @@ -0,0 +1,140 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// - When the user executes a timed settings bus command, +// this module will tag the sample (on m_axis_data_tuser) +// that the command should apply on. +// - Order of operation: +// 1) Receives settings bus command +// a) If time != 0, output on non-timed settings bus +// b) If time != 0, output on timed settings bus and store time in FIFO +// It is assumed the user will use timed_settings_bus.v implementation. +// 2) + +module axi_tag_time #( + parameter WIDTH = 32, + parameter HEADER_WIDTH = 128, + parameter SR_AWIDTH = 8, + parameter SR_DWIDTH = 32, + parameter SR_TWIDTH = 64, + parameter NUM_TAGS = 1, + parameter [NUM_TAGS*SR_AWIDTH-1:0] SR_TAG_ADDRS = 0, + parameter CMD_FIFO_SIZE = 5, + parameter MAX_TICK_RATE = 2**16-1 +)( + input clk, + input reset, + input clear, + input [$clog2(MAX_TICK_RATE)-1:0] tick_rate, + output timed_cmd_fifo_full, + // From AXI Wrapper + input [WIDTH-1:0] s_axis_data_tdata, + input [HEADER_WIDTH-1:0] s_axis_data_tuser, + input s_axis_data_tlast, + input s_axis_data_tvalid, + output s_axis_data_tready, + // To user + output [WIDTH-1:0] m_axis_data_tdata, + output [HEADER_WIDTH-1:0] m_axis_data_tuser, + output [NUM_TAGS-1:0] m_axis_data_tag, + output m_axis_data_tlast, + output m_axis_data_tvalid, + input m_axis_data_tready, + // Settings bus from Noc Shell + input in_set_stb, + input [SR_AWIDTH-1:0] in_set_addr, + input [SR_DWIDTH-1:0] in_set_data, + input [SR_TWIDTH-1:0] in_set_time, + input in_set_has_time, + // Non-timed settings bus to user + output out_set_stb, + output [SR_AWIDTH-1:0] out_set_addr, + output [SR_DWIDTH-1:0] out_set_data, + // Timed settings bus to user + output timed_set_stb, + output [SR_AWIDTH-1:0] timed_set_addr, + output [SR_DWIDTH-1:0] timed_set_data +); + + assign out_set_addr = in_set_addr; + assign out_set_data = in_set_data; + assign out_set_stb = in_set_stb & ~in_set_has_time; + assign timed_set_addr = in_set_addr; + assign timed_set_data = in_set_data; + assign timed_set_stb = in_set_stb & in_set_has_time; + + // Extract vita time from tuser + wire [63:0] vita_time_in; + cvita_hdr_decoder cvita_hdr_decoder_in ( + .header(s_axis_data_tuser), + .pkt_type(), .eob(), .has_time(), + .seqnum(), .length(), .payload_length(), + .src_sid(), .dst_sid(), + .vita_time(vita_time_in)); + + // Track time + reg header_valid = 1'b1; + reg [63:0] vita_time_now = 64'd0, set_time_hold = 64'd0; + always @(posedge clk) begin + if (reset | clear) begin + header_valid <= 1'b1; + end else begin + if (s_axis_data_tvalid & s_axis_data_tready) begin + if (s_axis_data_tlast) begin + header_valid <= 1'b1; + end else begin + header_valid <= 1'b0; + end + if (header_valid) begin + vita_time_now <= vita_time_in; + end else begin + vita_time_now <= vita_time_now + tick_rate; + end + end + end + end + + genvar i; + wire [NUM_TAGS-1:0] tags; + generate + for (i = 0; i < NUM_TAGS; i = i + 1) begin + assign tags[i] = (in_set_addr == SR_TAG_ADDRS[SR_AWIDTH*(i+1)-1:SR_AWIDTH*i]); + end + endgenerate + + // FIFO to hold tags + times + wire [SR_TWIDTH-1:0] fifo_set_time; + wire [NUM_TAGS-1:0] fifo_tags; + wire fifo_tvalid, fifo_tready; + wire timed_cmd_fifo_full_n; + axi_fifo #(.WIDTH(SR_TWIDTH+NUM_TAGS), .SIZE(CMD_FIFO_SIZE)) axi_fifo ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({in_set_time,tags}), .i_tvalid(timed_set_stb), .i_tready(timed_cmd_fifo_full_n), + .o_tdata({fifo_set_time,fifo_tags}), .o_tvalid(fifo_tvalid), .o_tready(fifo_tready), + .space(), .occupied()); + + // Extract has time from tuser + wire has_time; + cvita_hdr_decoder cvita_hdr_decoder_out ( + .header(m_axis_data_tuser), + .pkt_type(), .eob(), .has_time(has_time), + .seqnum(), .length(), .payload_length(), + .src_sid(), .dst_sid(), + .vita_time()); + + assign timed_cmd_fifo_full = ~timed_cmd_fifo_full_n; + assign fifo_tready = m_axis_data_tvalid & m_axis_data_tready & fifo_tvalid & has_time & (vita_time_now >= fifo_set_time); + assign in_rb_stb = fifo_tready; + + // Need a single cycle delay to allow vita_time_now to update at the start of a new packet + axi_fifo_flop #(.WIDTH(WIDTH+HEADER_WIDTH+1)) axi_fifo_flop ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({s_axis_data_tdata,s_axis_data_tuser,s_axis_data_tlast}), .i_tvalid(s_axis_data_tvalid), .i_tready(s_axis_data_tready), + .o_tdata({m_axis_data_tdata,m_axis_data_tuser,m_axis_data_tlast}), .o_tvalid(m_axis_data_tvalid), .o_tready(m_axis_data_tready)); + + assign m_axis_data_tag = ((vita_time_now >= fifo_set_time) & fifo_tvalid & has_time) ? fifo_tags : 'd0; + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/axi_wrapper.v b/fpga/usrp3/lib/rfnoc/axi_wrapper.v new file mode 100644 index 000000000..f93a693b8 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/axi_wrapper.v @@ -0,0 +1,217 @@ +// +// Copyright 2015 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Assumes 32-bit elements (such as sc16) carried over AXI-Stream +// SIMPLE_MODE -- Automatically handle header (s_axis_data_tuser), packets must be consumed / produced 1-to-1 +// RESIZE_INPUT_PACKET -- Resize input packets. m_axis_data_tlast will be based on m_axis_pkt_len_tdata. Otherwise packet length based on actual input packet length (via i_tlast). +// RESIZE_OUTPUT_PACKET -- Resize output packets. s_axis_data_tlast will be ignored and instead use packet length in s_axis_tuser_data. Otherwise use s_axis_data_tlast. +// +// Note: When SIMPLE_MODE = 1 and RESIZE_OUTPUT_PACKET = 1, s_axis_data_tlast is ignored and output packets are sized according to the length +// of the input packet (via the packet length field in the received header). Useful if the user design wants output packet length to +// match the input packet length without having to drive s_axis_data_tlast. +// +// *** Warning: Care should be taken when using RESIZE_INPUT_PACKET and/or RESIZE_OUTPUT_PACKET along with SIMPLE_MODE +// as issues could arise if packets are not produced / consumed in a 1:1 ratio. For instance, the header +// FIFO could overflow or underflow. + +// _tuser bit definitions +// [127:64] == CHDR header +// [127:126] == Packet type -- 00 for data, 01 for flow control, 10 for command, 11 for response +// [125] == Has time? (0 for no, 1 for time field on next line) +// [124] == EOB (end of burst indicator) +// [123:112] == 12-bit sequence number +// [111: 96] == 16-bit length in bytes +// [ 95: 80] == SRC SID (stream ID) +// [ 79: 64] == DST SID +// [ 63: 0] == timestamp + +module axi_wrapper + #(parameter MTU=10, + parameter SR_AXI_CONFIG_BASE=129, // AXI configuration bus base, settings bus address range size is 2*NUM_AXI_CONFIG_BUS + parameter NUM_AXI_CONFIG_BUS=1, // Number of AXI configuration buses + parameter CONFIG_BUS_FIFO_DEPTH=1, // Depth of AXI configuration bus FIFO. Note: AXI configuration bus lacks back pressure. + parameter SIMPLE_MODE=1, // 0 = User handles CHDR insertion via tuser signals, 1 = Automatically save / insert CHDR with internal FIFO + parameter USE_SEQ_NUM=0, // 0 = Frame will automatically handle sequence number, 1 = Use sequence number provided in s_axis_data_tuser + parameter RESIZE_INPUT_PACKET=0, // 0 = Do not resize, packet length determined by i_tlast, 1 = Generate m_axis_data_tlast based on user input m_axis_pkt_len_tdata + parameter RESIZE_OUTPUT_PACKET=0, // 0 = Do not resize, packet length determined by s_axis_data_tlast, 1 = Use packet length from user header (s_axis_data_tuser) + parameter WIDTH=32) // Specify the output width for the AXI stream data (can be 32 or 64) + (input clk, input reset, + input bus_clk, input bus_rst, + + input clear_tx_seqnum, + input [15:0] next_dst, // Used with SIMPLE_MODE=1 + + // To NoC Shell + input set_stb, input [7:0] set_addr, input [31:0] set_data, + input [63:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [63:0] o_tdata, output o_tlast, output o_tvalid, input o_tready, + + // To AXI IP + output [WIDTH-1:0] m_axis_data_tdata, output [127:0] m_axis_data_tuser, output m_axis_data_tlast, output m_axis_data_tvalid, input m_axis_data_tready, + input [WIDTH-1:0] s_axis_data_tdata, input [127:0] s_axis_data_tuser, input s_axis_data_tlast, input s_axis_data_tvalid, output s_axis_data_tready, + input [15:0] m_axis_pkt_len_tdata, input m_axis_pkt_len_tvalid, output m_axis_pkt_len_tready, // Used when RESIZE_INPUT_PACKET=1 + + // Variable number of AXI configuration buses + output [NUM_AXI_CONFIG_BUS*32-1:0] m_axis_config_tdata, + output [NUM_AXI_CONFIG_BUS-1:0] m_axis_config_tlast, + output [NUM_AXI_CONFIG_BUS-1:0] m_axis_config_tvalid, + input [NUM_AXI_CONFIG_BUS-1:0] m_axis_config_tready + ); + + + wire clear_tx_seqnum_bclk; + pulse_synchronizer clear_tx_seqnum_sync_i ( + .clk_a(clk), .rst_a(reset), .pulse_a(clear_tx_seqnum), .busy_a(/*Ignored: Pulses from SW are slow*/), + .clk_b(bus_clk), .pulse_b(clear_tx_seqnum_bclk) + ); + + // ///////////////////////////////////////////////////////// + // Input side handling, chdr_deframer + wire [127:0] s_axis_data_tuser_int, m_axis_data_tuser_int; + wire s_axis_data_tlast_int, m_axis_data_tlast_int; + reg [15:0] m_axis_pkt_len_reg = 16'd8; + reg sof_in = 1'b1; + wire [127:0] header_fifo_i_tdata = {m_axis_data_tuser[127:96],m_axis_data_tuser[79:64],next_dst,m_axis_data_tuser[63:0]}; + wire header_fifo_i_tvalid = sof_in & m_axis_data_tvalid & m_axis_data_tready; + + chdr_deframer_2clk #(.WIDTH(WIDTH)) chdr_deframer ( + .samp_clk(clk), .samp_rst(reset | clear_tx_seqnum), .pkt_clk(bus_clk), .pkt_rst(bus_rst | clear_tx_seqnum_bclk), + .i_tdata(i_tdata), .i_tlast(i_tlast), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata(m_axis_data_tdata), .o_tuser(m_axis_data_tuser_int), .o_tlast(m_axis_data_tlast_int), .o_tvalid(m_axis_data_tvalid), .o_tready(m_axis_data_tready) + ); + + assign m_axis_data_tuser[127:80] = m_axis_data_tuser_int[127:80]; + assign m_axis_data_tuser[79:64] = RESIZE_INPUT_PACKET ? (m_axis_data_tuser_int[125] ? m_axis_pkt_len_reg+16 : m_axis_pkt_len_reg+8) : m_axis_data_tuser_int[79:64]; + assign m_axis_data_tuser[63:0] = m_axis_data_tuser_int[63:0]; + + // Only store header once per packet + always @(posedge clk) + if(reset | clear_tx_seqnum) + sof_in <= 1'b1; + else + if(m_axis_data_tvalid & m_axis_data_tready) + if(m_axis_data_tlast) + sof_in <= 1'b1; + else + sof_in <= 1'b0; + + // SIMPLE MODE: Store input packet header to reuse as output packet header. + generate + if(SIMPLE_MODE) + begin + // FIFO + axi_fifo #(.WIDTH(128), .SIZE(5)) header_fifo + (.clk(clk), .reset(reset), .clear(clear_tx_seqnum), + .i_tdata(header_fifo_i_tdata), + .i_tvalid(header_fifo_i_tvalid), .i_tready(), + .o_tdata(s_axis_data_tuser_int), .o_tvalid(), .o_tready(s_axis_data_tlast_int & s_axis_data_tvalid & s_axis_data_tready), + .occupied(), .space()); + end else begin + assign s_axis_data_tuser_int = s_axis_data_tuser; + end + endgenerate + + // RESIZE INPUT PACKET + // Size input packets based on m_axis_pkt_len_tdata (RESIZE_INPUT_PACKET=1) or based on i_tdata + generate + if (RESIZE_INPUT_PACKET) begin + reg m_axis_data_tlast_reg; + reg [15:0] m_axis_pkt_cnt; + always @(posedge clk) begin + if (reset | clear_tx_seqnum) begin + m_axis_data_tlast_reg <= 1'b0; + m_axis_pkt_cnt <= (WIDTH/8); // Number of bytes in packet + m_axis_pkt_len_reg <= 2*(WIDTH/8); // Double size by default + end else begin + // Only update packet length at the beginning of a new packet + if (m_axis_pkt_len_tvalid & m_axis_pkt_len_tready) begin + m_axis_pkt_len_reg <= m_axis_pkt_len_tdata; + end + if (m_axis_data_tvalid & m_axis_data_tready) begin + if (m_axis_pkt_cnt >= m_axis_pkt_len_reg) begin + m_axis_pkt_cnt <= (WIDTH/8); + end else begin + m_axis_pkt_cnt <= m_axis_pkt_cnt + (WIDTH/8); + end + if (m_axis_pkt_cnt >= m_axis_pkt_len_reg-(WIDTH/8)) begin + m_axis_data_tlast_reg <= 1'b1; + end else begin + m_axis_data_tlast_reg <= 1'b0; + end + end + end + end + assign m_axis_data_tlast = m_axis_data_tlast_reg; + assign m_axis_pkt_len_tready = sof_in; + end else begin + assign m_axis_data_tlast = m_axis_data_tlast_int; + assign m_axis_pkt_len_tready = 1'b0; + end + endgenerate + + // RESIZE OUTPUT PACKET + // Size output packets based on either s_axis_data_tlast (RESIZE_OUTPUT_PACKETS=1) or packet length from user header (s_axis_data_tuser) + // TODO: There could be a race condition on s_axis_data_tuser_int when + // receiving very short packets, but latency in chdr_deframer + // prevents this from occurring. Need to fix so it cannot + // occur by design. + generate + if (RESIZE_OUTPUT_PACKET) begin + reg [15:0] s_axis_pkt_cnt; + reg [15:0] s_axis_pkt_len; + always @(posedge clk) begin + if (reset | clear_tx_seqnum) begin + s_axis_pkt_cnt <= (WIDTH/8); + s_axis_pkt_len <= 0; + end else begin + // Remove header + s_axis_pkt_len <= s_axis_data_tuser_int[125] ? s_axis_data_tuser_int[111:96]-16 : s_axis_data_tuser_int[111:96]-8; + if (s_axis_data_tvalid & s_axis_data_tready) begin + if ((s_axis_pkt_cnt >= s_axis_pkt_len) | s_axis_data_tlast) begin + s_axis_pkt_cnt <= (WIDTH/8); + end else begin + s_axis_pkt_cnt <= s_axis_pkt_cnt + (WIDTH/8); + end + end + end + end + assign s_axis_data_tlast_int = (s_axis_pkt_cnt >= s_axis_pkt_len) | s_axis_data_tlast; + end else begin + // chdr_framer will automatically fill in the packet length based on user provided tlast + assign s_axis_data_tlast_int = s_axis_data_tlast; + end + endgenerate + + // ///////////////////////////////////////////////////////// + // Output side handling, chdr_framer + chdr_framer_2clk #(.SIZE(MTU), .WIDTH(WIDTH), .USE_SEQ_NUM(USE_SEQ_NUM)) chdr_framer ( + .samp_clk(clk), .samp_rst(reset | clear_tx_seqnum), .pkt_clk(bus_clk), .pkt_rst(bus_rst | clear_tx_seqnum_bclk), + .i_tdata(s_axis_data_tdata), .i_tuser(s_axis_data_tuser_int), .i_tlast(s_axis_data_tlast_int), .i_tvalid(s_axis_data_tvalid), .i_tready(s_axis_data_tready), + .o_tdata(o_tdata), .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready) + ); + + // ///////////////////////////////////////////////////////// + // Control bus handling + // FIXME we could put inline control here... + // Generate additional AXI stream interfaces for configuration. + // FIXME need to make sure we don't overrun this if core can backpressure us + // Write to SR_AXI_CONFIG_BASE+1+2*(CONFIG BUS #) asserts tvalid, SR_AXI_CONFIG_BASE+1+2*(CONFIG BUS #)+1 asserts tvalid & tlast + genvar k; + generate + for (k = 0; k < NUM_AXI_CONFIG_BUS; k = k + 1) begin + axi_fifo #(.WIDTH(33), .SIZE(CONFIG_BUS_FIFO_DEPTH)) config_stream + (.clk(clk), .reset(reset), .clear(clear_tx_seqnum), + .i_tdata({(set_addr == (SR_AXI_CONFIG_BASE+2*k+1)),set_data}), + .i_tvalid(set_stb & ((set_addr == (SR_AXI_CONFIG_BASE+2*k))|(set_addr == (SR_AXI_CONFIG_BASE+2*k+1)))), + .i_tready(), + .o_tdata({m_axis_config_tlast[k],m_axis_config_tdata[32*k+31:32*k]}), + .o_tvalid(m_axis_config_tvalid[k]), + .o_tready(m_axis_config_tready[k]), + .occupied(), .space()); + end + endgenerate + +endmodule // axi_wrapper diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/Makefile b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/Makefile new file mode 100644 index 000000000..acee50882 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/Makefile @@ -0,0 +1,45 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/utils/Makefile.srcs +include Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(RFNOC_CORE_SRCS) \ +$(RFNOC_UTIL_SRCS) \ +$(RFNOC_BLOCK_AXI_RAM_FIFO_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +SIM_TOP = rfnoc_block_axi_ram_fifo_all_tb + +SIM_SRCS = \ +$(abspath sim_axi_ram.sv) \ +$(abspath rfnoc_block_axi_ram_fifo_tb.sv) \ +$(abspath rfnoc_block_axi_ram_fifo_all_tb.sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/Makefile.srcs b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/Makefile.srcs new file mode 100644 index 000000000..9faa27321 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/Makefile.srcs @@ -0,0 +1,18 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +################################################## +# RFNoC Utility Sources +################################################## +RFNOC_BLOCK_AXI_RAM_FIFO_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/, \ +noc_shell_axi_ram_fifo.v \ +axi_ram_fifo_regs.vh \ +axi_ram_fifo_regs.v \ +axi_ram_fifo_bist.v \ +axi_ram_fifo_bist_regs.v \ +axi_ram_fifo.v \ +rfnoc_block_axi_ram_fifo.v \ +)) diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo.v new file mode 100644 index 000000000..5dd5f5ec4 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo.v @@ -0,0 +1,1228 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axi_ram_fifo +// +// Description: +// +// Implements a FIFO using a memory-mapped AXI interface as storage. This can +// be connected to any memory-mapped AXI4 bus interface, such as DRAM, SRAM, +// or AXI interconnect IP. The input and output interfaces to the FIFO are +// AXI-Stream. +// +// The logic is designed to buffer up multiple words so that writes and reads +// can be implemented as efficient burst transactions on the AXI4 bus. This +// core never crosses 4 KiB boundaries, per AXI4 rules (a burst must not +// cross a 4 KiB boundary). +// +// The FIFO must be at least 4 KiB in size so that the 4 KiB page boundary +// protection also handles/prevents the FIFO wrap corner case. +// +// Parameters: +// +// MEM_ADDR_W : The width of the byte address to use for the AXI4 memory +// mapped interface. +// +// MEM_DATA_W : The width of the data port to use for the AXI4 memory +// mapped interface. +// +// KEEP_W : Width of tkeep on the AXI-Stream interface. Set to 1 if +// tkeep is not used. +// +// FIFO_ADDR_BASE : Default base address to use for this FIFO. +// +// FIFO_ADDR_MASK : Default byte address mask, which defines which memory +// address bits can be used for the FIFO. For example, an 64 +// KiB memory region, or 2^16 bytes, would require the mask +// 0xFFFF (16 ones). In other words, the mask should be the +// size of the memory region minus 1. +// +// BURST_TIMEOUT : Default number of memory clock cycles to wait for new +// data before performing a short, sub-optimal burst. One +// value per FIFO. +// +// BIST : If true, BIST logic will be included in the build. +// +// CLK_RATE : Frequency of clk in Hz +// +// IN_FIFO_SIZE : The input FIFO size will be 2**IN_FIFO_SIZE in depth. +// +// OUT_FIFO_SIZE : The output FIFO size will be 2**OUT_FIFO_SIZE in depth. +// This must be at least 9 so that there is enough space to +// accept a full AXI4 burst and then accept additional +// bursts while the FIFO is reading out. +// + +module axi_ram_fifo #( + parameter MEM_ADDR_W = 32, + parameter MEM_DATA_W = 64, + parameter KEEP_W = 1, + parameter [MEM_ADDR_W-1:0] FIFO_ADDR_BASE = 'h0, + parameter [MEM_ADDR_W-1:0] FIFO_ADDR_MASK = 'h00FFFFFF, + parameter BURST_TIMEOUT = 256, + parameter BIST = 1, + parameter CLK_RATE = 200e6, + parameter IN_FIFO_SIZE = 11, + parameter OUT_FIFO_SIZE = 10 +) ( + + input clk, + input rst, + + //-------------------------------------------------------------------------- + // CTRL Port + //-------------------------------------------------------------------------- + + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + output wire s_ctrlport_resp_ack, + output wire [31:0] s_ctrlport_resp_data, + + //-------------------------------------------------------------------------- + // AXI-Stream Interface + //-------------------------------------------------------------------------- + + // FIFO Input + input wire [MEM_DATA_W-1:0] s_tdata, + input wire [ KEEP_W-1:0] s_tkeep, + input wire s_tlast, + input wire s_tvalid, + output wire s_tready, + + // FIFO Output + output wire [MEM_DATA_W-1:0] m_tdata, + output wire [ KEEP_W-1:0] m_tkeep, + output wire m_tlast, + output wire m_tvalid, + input wire m_tready, + + //-------------------------------------------------------------------------- + // AXI4 Memory Interface + //-------------------------------------------------------------------------- + + // AXI Write Address Channel + output wire [ 0:0] m_axi_awid, // Write address ID. This signal is the identification tag for the write address signals. + output wire [ MEM_ADDR_W-1:0] m_axi_awaddr, // Write address. The write address gives the address of the first transfer in a write burst. + output wire [ 7:0] m_axi_awlen, // Burst length. The burst length gives the exact number of transfers in a burst. + output wire [ 2:0] m_axi_awsize, // Burst size. This signal indicates the size of each transfer in the burst. + output wire [ 1:0] m_axi_awburst, // Burst type. The burst type and the size information, determine how the address is calculated. + output wire [ 0:0] m_axi_awlock, // Lock type. Provides additional information about the atomic characteristics of the transfer. + output wire [ 3:0] m_axi_awcache, // Memory type. This signal indicates how transactions are required to progress. + output wire [ 2:0] m_axi_awprot, // Protection type. This signal indicates the privilege and security level of the transaction. + output wire [ 3:0] m_axi_awqos, // Quality of Service, QoS. The QoS identifier sent for each write transaction. + output wire [ 3:0] m_axi_awregion, // Region identifier. Permits a single physical interface on a slave to be re-used. + output wire [ 0:0] m_axi_awuser, // User signal. Optional User-defined signal in the write address channel. + output wire m_axi_awvalid, // Write address valid. This signal indicates that the channel is signaling valid write addr. + input wire m_axi_awready, // Write address ready. This signal indicates that the slave is ready to accept an address. + // + // AXI Write Data Channel + output wire [ MEM_DATA_W-1:0] m_axi_wdata, // Write data + output wire [MEM_DATA_W/8-1:0] m_axi_wstrb, // Write strobes. This signal indicates which byte lanes hold valid data. + output wire m_axi_wlast, // Write last. This signal indicates the last transfer in a write burst. + output wire [ 0:0] m_axi_wuser, // User signal. Optional User-defined signal in the write data channel. + output wire m_axi_wvalid, // Write valid. This signal indicates that valid write data and strobes are available. + input wire m_axi_wready, // Write ready. This signal indicates that the slave can accept the write data. + // + // AXI Write Response Channel + input wire [ 0:0] m_axi_bid, // Response ID tag. This signal is the ID tag of the write response. + input wire [ 1:0] m_axi_bresp, // Write response. This signal indicates the status of the write transaction. + input wire [ 0:0] m_axi_buser, // User signal. Optional User-defined signal in the write response channel. + input wire m_axi_bvalid, // Write response valid. This signal indicates that the channel is signaling a valid response. + output wire m_axi_bready, // Response ready. This signal indicates that the master can accept a write response. + // + // AXI Read Address Channel + output wire [ 0:0] m_axi_arid, // Read address ID. This signal is the identification tag for the read address group of signals. + output wire [ MEM_ADDR_W-1:0] m_axi_araddr, // Read address. The read address gives the address of the first transfer in a read burst. + output wire [ 7:0] m_axi_arlen, // Burst length. This signal indicates the exact number of transfers in a burst. + output wire [ 2:0] m_axi_arsize, // Burst size. This signal indicates the size of each transfer in the burst. + output wire [ 1:0] m_axi_arburst, // Burst type. The burst type and the size information determine how the address for each transfer. + output wire [ 0:0] m_axi_arlock, // Lock type. This signal provides additional information about the atomic characteristics. + output wire [ 3:0] m_axi_arcache, // Memory type. This signal indicates how transactions are required to progress. + output wire [ 2:0] m_axi_arprot, // Protection type. This signal indicates the privilege and security level of the transaction. + output wire [ 3:0] m_axi_arqos, // Quality of Service, QoS. QoS identifier sent for each read transaction. + output wire [ 3:0] m_axi_arregion, // Region identifier. Permits a single physical interface on a slave to be re-used. + output wire [ 0:0] m_axi_aruser, // User signal. Optional User-defined signal in the read address channel. + output wire m_axi_arvalid, // Read address valid. This signal indicates that the channel is signaling valid read addr. + input wire m_axi_arready, // Read address ready. This signal indicates that the slave is ready to accept an address. + // + // AXI Read Data Channel + input wire [ 0:0] m_axi_rid, // Read ID tag. This signal is the identification tag for the read data group of signals. + input wire [ MEM_DATA_W-1:0] m_axi_rdata, // Read data. + input wire [ 1:0] m_axi_rresp, // Read response. This signal indicates the status of the read transfer. + input wire m_axi_rlast, // Read last. This signal indicates the last transfer in a read burst. + input wire [ 0:0] m_axi_ruser, // User signal. Optional User-defined signal in the read data channel. + input wire m_axi_rvalid, // Read valid. This signal indicates that the channel is signaling the required read data. + output wire m_axi_rready // Read ready. This signal indicates that the master can accept the read data and response. +); + + `include "axi_ram_fifo_regs.vh" + + + //--------------------------------------------------------------------------- + // Parameter Checking + //--------------------------------------------------------------------------- + + // The input FIFO size must be at least 9 so that there is enough space to + // hold an entire burst and be able to accept new data while that burst is + // waiting to be ready out. + if (IN_FIFO_SIZE < 9) begin + IN_FIFO_SIZE_must_be_at_least_9(); + end + + // The output FIFO size must be at least 9 so that there is enough space to + // accept a full AXI4 burst (255 words) and then accept additional bursts + // while the FIFO is waiting to be read out. + if (OUT_FIFO_SIZE < 9) begin + OUT_FIFO_SIZE_must_be_at_least_9(); + end + + // The memory must be at least as big as the default FIFO mask + if (2.0**MEM_ADDR_W < FIFO_ADDR_MASK+1) begin + MEM_ADDR_W_must_be_larger_than_size_indicated_by_FIFO_ADDR_MASK(); + end + + // The FIFO memory must be large enough for a full AXI4 burst + 64 words + // that's allocated to allow for read/write reordering. + // TODO: Is the 64-word extra needed? Why 64? + // + // Min size allowed for memory region in bytes + localparam FIFO_MIN_RAM_SIZE = (256+64) * (MEM_DATA_W/8); + // + // Equivalent mask + localparam FIFO_ADDR_MASK_MIN = 2**($clog2(FIFO_MIN_RAM_SIZE))-1; + // + // Check the parameter + if (FIFO_ADDR_MASK < FIFO_ADDR_MASK_MIN) begin + FIFO_ADDR_MASK_must_be_at_least_256_plus_64_words(); + end + + // The 4 KiB page-crossing detection logic assumes that the memory is more + // than 4 kiB in size. This could be fixed in the code, but 8 KiB is already + // pretty small for an external memory. + if (2.0**MEM_ADDR_W < 8192) begin + MEM_ADDR_W_must_be_at_least_8_KiB(); + end + + // Make sure the default burst timeout is not too big for the register + if ($clog2(BURST_TIMEOUT+1) > REG_TIMEOUT_W) begin + BURST_TIMEOUT_must_not_exceed_the_range_of_REG_TIMEOUT_W(); + end + + + //--------------------------------------------------------------------------- + // Local Parameters + //--------------------------------------------------------------------------- + + // Width of the timeout counter + localparam TIMEOUT_W = REG_TIMEOUT_W; + + // Address widths. Each memory byte address can be broken up into the word + // address portion (the upper bits) and the byte address portion (lower + // bits). Although the memory is byte addressable, we only read/write whole + // words. + localparam BYTE_ADDR_W = $clog2(MEM_DATA_W/8); + localparam WORD_ADDR_W = MEM_ADDR_W - BYTE_ADDR_W; + + + //--------------------------------------------------------------------------- + // Functions + //--------------------------------------------------------------------------- + + function automatic integer min(input integer a, b); + min = a < b ? a : b; + endfunction + + + //--------------------------------------------------------------------------- + // Signal Declarations + //--------------------------------------------------------------------------- + + // Track RAM FIFO state, in number of words + reg [WORD_ADDR_W:0] space; + reg [WORD_ADDR_W:0] occupied; + reg [WORD_ADDR_W:0] occupied_minus_one; // Maintain a -1 version to break critical timing paths + + reg [31:0] out_pkt_count = 0; + + // + // Input Side + // + reg [MEM_DATA_W-1:0] s_tdata_fifo; + reg s_tvalid_fifo; + wire s_tready_fifo; + + wire [MEM_DATA_W-1:0] m_tdata_fifo; + wire m_tvalid_fifo; + reg m_tready_fifo; + + wire [MEM_DATA_W-1:0] s_tdata_i1; + wire [ KEEP_W-1:0] s_tkeep_i1; + wire s_tvalid_i1, s_tready_i1, s_tlast_i1; + + wire [MEM_DATA_W-1:0] s_tdata_i2; + wire s_tvalid_i2, s_tready_i2; + + wire [MEM_DATA_W-1:0] s_tdata_i3; + wire s_tvalid_i3; + reg s_tready_i3; + + wire [MEM_DATA_W-1:0] s_tdata_input; + wire s_tvalid_input, s_tready_input; + + wire [15:0] space_input, occupied_input; + reg [15:0] space_input_reg; + reg suppress_reads; + + // + // Output Side + // + wire [MEM_DATA_W-1:0] m_tdata_output; + wire m_tvalid_output, m_tready_output; + + reg [MEM_DATA_W-1:0] m_tdata_i0; + reg m_tvalid_i0; + wire m_tready_i0; + + wire [MEM_DATA_W-1:0] m_tdata_i1; + wire m_tvalid_i1, m_tready_i1; + + wire [MEM_DATA_W-1:0] m_tdata_i2; + wire m_tvalid_i2, m_tready_i2; + + wire [MEM_DATA_W-1:0] m_tdata_i3; + wire [ KEEP_W-1:0] m_tkeep_i3; + wire m_tvalid_i3, m_tready_i3, m_tlast_i3; + + wire [15:0] space_output; + + + //--------------------------------------------------------------------------- + // Registers + //--------------------------------------------------------------------------- + + wire [ 15:0] set_suppress_threshold; + wire [ TIMEOUT_W-1:0] set_timeout; + wire set_clear = 1'b0; // Clear no longer needed in RFNoC + wire [MEM_ADDR_W-1:0] set_fifo_addr_base; + wire [MEM_ADDR_W-1:0] set_fifo_addr_mask; + + wire s_ctrlport_resp_ack_regs; + wire [31:0] s_ctrlport_resp_data_regs; + + axi_ram_fifo_regs #( + .MEM_ADDR_W (MEM_ADDR_W), + .MEM_DATA_W (MEM_DATA_W), + .FIFO_ADDR_BASE (FIFO_ADDR_BASE), + .FIFO_ADDR_MASK (FIFO_ADDR_MASK), + .FIFO_ADDR_MASK_MIN (FIFO_ADDR_MASK_MIN), + .BIST (BIST), + .IN_FIFO_SIZE (IN_FIFO_SIZE), + .WORD_ADDR_W (WORD_ADDR_W), + .BURST_TIMEOUT (BURST_TIMEOUT), + .TIMEOUT_W (TIMEOUT_W) + ) axi_ram_fifo_regs_i ( + .clk (clk), + .rst (rst), + .s_ctrlport_req_wr (s_ctrlport_req_wr), + .s_ctrlport_req_rd (s_ctrlport_req_rd), + .s_ctrlport_req_addr (s_ctrlport_req_addr), + .s_ctrlport_req_data (s_ctrlport_req_data), + .s_ctrlport_resp_ack (s_ctrlport_resp_ack_regs), + .s_ctrlport_resp_data (s_ctrlport_resp_data_regs), + .rb_out_pkt_count (out_pkt_count), + .rb_occupied (occupied), + .set_suppress_threshold (set_suppress_threshold), + .set_timeout (set_timeout), + .set_fifo_addr_base (set_fifo_addr_base), + .set_fifo_addr_mask (set_fifo_addr_mask) + ); + + //synthesis translate_off + // Check the address mask at run-time + always @(set_fifo_addr_mask) begin + if (set_fifo_addr_mask < FIFO_ADDR_MASK_MIN) begin + $display("ERROR: set_fifo_addr_mask was set too small!"); + end + if (2**$clog2(set_fifo_addr_mask)-1 != set_fifo_addr_mask) begin + $display("ERROR: set_fifo_addr_mask must be a power of 2, minus 1!"); + end + end + //synthesis translate_on + + + //--------------------------------------------------------------------------- + // BIST for production testing + //--------------------------------------------------------------------------- + + if (BIST) begin : gen_bist + wire s_ctrlport_resp_ack_bist; + wire [ 31:0] s_ctrlport_resp_data_bist; + wire [MEM_DATA_W-1:0] m_tdata_bist; + wire m_tvalid_bist; + reg m_tready_bist; + reg [MEM_DATA_W-1:0] s_tdata_bist; + reg s_tvalid_bist; + wire s_tready_bist; + + wire bist_running; + + axi_ram_fifo_bist #( + .DATA_W (MEM_DATA_W), + .COUNT_W (48), + .CLK_RATE (CLK_RATE), + .RAND (1) + ) axi_ram_fifo_bist_i ( + .clk (clk), + .rst (rst), + .s_ctrlport_req_wr (s_ctrlport_req_wr), + .s_ctrlport_req_rd (s_ctrlport_req_rd), + .s_ctrlport_req_addr (s_ctrlport_req_addr), + .s_ctrlport_req_data (s_ctrlport_req_data), + .s_ctrlport_resp_ack (s_ctrlport_resp_ack_bist), + .s_ctrlport_resp_data (s_ctrlport_resp_data_bist), + .m_tdata (m_tdata_bist), + .m_tvalid (m_tvalid_bist), + .m_tready (m_tready_bist), + .s_tdata (s_tdata_bist), + .s_tvalid (s_tvalid_bist), + .s_tready (s_tready_bist), + .running (bist_running) + ); + + // Use a multiplexer to decide where the data flows, using the BIST when + // ever the BIST is running. + always @(*) begin + if (bist_running) begin + // Insert the BIST logic + s_tdata_fifo = m_tdata_bist; + s_tvalid_fifo = m_tvalid_bist; + m_tready_bist = s_tready_fifo; + // + s_tdata_bist = m_tdata_fifo; + s_tvalid_bist = m_tvalid_fifo; + m_tready_fifo = s_tready_bist; + + // Disable output-logic + s_tready_i3 = 0; + m_tdata_i0 = m_tdata_fifo; + m_tvalid_i0 = 0; + end else begin + // Disable BIST + m_tready_bist = 0; + s_tdata_bist = m_tdata_fifo; + s_tvalid_bist = 0; + + // Bypass BIST + s_tdata_fifo = s_tdata_i3; + s_tvalid_fifo = s_tvalid_i3; + s_tready_i3 = s_tready_fifo; + // + m_tdata_i0 = m_tdata_fifo; + m_tvalid_i0 = m_tvalid_fifo; + m_tready_fifo = m_tready_i0; + end + end + + // Combine register responses + ctrlport_resp_combine #( + .NUM_SLAVES (2) + ) ctrlport_resp_combine_i ( + .ctrlport_clk (clk), + .ctrlport_rst (rst), + .m_ctrlport_resp_ack ({s_ctrlport_resp_ack_bist, s_ctrlport_resp_ack_regs}), + .m_ctrlport_resp_status ({2{2'b00}}), + .m_ctrlport_resp_data ({s_ctrlport_resp_data_bist, s_ctrlport_resp_data_regs}), + .s_ctrlport_resp_ack (s_ctrlport_resp_ack), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (s_ctrlport_resp_data) + ); + + end else begin : gen_no_bist + assign s_ctrlport_resp_ack = s_ctrlport_resp_ack_regs; + assign s_ctrlport_resp_data = s_ctrlport_resp_data_regs; + always @(*) begin + // Bypass the BIST logic + s_tdata_fifo = s_tdata_i3; + s_tvalid_fifo = s_tvalid_i3; + s_tready_i3 = s_tready_fifo; + // + m_tdata_i0 = m_tdata_fifo; + m_tvalid_i0 = m_tvalid_fifo; + m_tready_fifo = m_tready_i0; + // + end + end + + + //--------------------------------------------------------------------------- + // Input Handling and Buffer + //--------------------------------------------------------------------------- + // + // This block embeds TLAST into the data stream using an escape code and + // buffers up input data. + // + //--------------------------------------------------------------------------- + + // Insert flops to improve timing + axi_fifo_flop2 #( + .WIDTH (MEM_DATA_W+1+KEEP_W) + ) input_pipe_i0 ( + .clk (clk), + .reset (rst), + .clear (set_clear), + // + .i_tdata ({s_tkeep, s_tlast, s_tdata}), + .i_tvalid (s_tvalid), + .i_tready (s_tready), + // + .o_tdata ({s_tkeep_i1, s_tlast_i1, s_tdata_i1}), + .o_tvalid (s_tvalid_i1), + .o_tready (s_tready_i1), + // + .space (), + .occupied () + ); + + axi_embed_tlast_tkeep #( + .DATA_W (MEM_DATA_W), + .KEEP_W (KEEP_W) + ) axi_embed_tlast_tkeep_i ( + .clk (clk), + .rst (rst | set_clear), + // + .i_tdata (s_tdata_i1), + .i_tkeep (s_tkeep_i1), + .i_tlast (s_tlast_i1), + .i_tvalid (s_tvalid_i1), + .i_tready (s_tready_i1), + // + .o_tdata (s_tdata_i2), + .o_tvalid (s_tvalid_i2), + .o_tready (s_tready_i2) + ); + + // Insert flops to improve timing + axi_fifo_flop2 #( + .WIDTH (MEM_DATA_W) + ) input_pipe_i1 ( + .clk (clk), + .reset (rst), + .clear (set_clear), + // + .i_tdata (s_tdata_i2), + .i_tvalid (s_tvalid_i2), + .i_tready (s_tready_i2), + // + .o_tdata (s_tdata_i3), + .o_tvalid (s_tvalid_i3), + .o_tready (s_tready_i3), + // + .space (), + .occupied () + ); + + axi_fifo #( + .WIDTH (MEM_DATA_W), + .SIZE (IN_FIFO_SIZE) + ) input_fifo ( + .clk (clk), + .reset (rst), + .clear (set_clear), + // + .i_tdata (s_tdata_fifo), + .i_tvalid (s_tvalid_fifo), + .i_tready (s_tready_fifo), + // + .o_tdata (s_tdata_input), + .o_tvalid (s_tvalid_input), + .o_tready (s_tready_input), + // + .space (space_input), + .occupied (occupied_input) + ); + + + //--------------------------------------------------------------------------- + // Input (Memory Write) Logic + //--------------------------------------------------------------------------- + // + // The input state machine waits for enough entries in input FIFO to trigger + // RAM write burst. A timeout can also trigger a burst so that smaller chunks + // of data are not left to rot in the input FIFO. Also, if enough data is + // present in the input FIFO to complete a burst up to the edge of a 4 KiB + // page then we do a burst up to the 4 KiB boundary. + // + //--------------------------------------------------------------------------- + + // + // Input side declarations + // + localparam [2:0] INPUT_IDLE = 0; + localparam [2:0] INPUT1 = 1; + localparam [2:0] INPUT2 = 2; + localparam [2:0] INPUT3 = 3; + localparam [2:0] INPUT4 = 4; + localparam [2:0] INPUT5 = 5; + localparam [2:0] INPUT6 = 6; + + wire write_ctrl_ready; + + reg [ 2:0] input_state; + reg input_timeout_triggered; + reg input_timeout_reset; + reg [ TIMEOUT_W-1:0] input_timeout_count; + reg [MEM_ADDR_W-1:0] write_addr; + reg write_ctrl_valid; + reg [ 7:0] write_count = 0; + reg [ 8:0] write_count_plus_one = 1; // Maintain a +1 version to break critical timing paths + reg update_write; + + reg [WORD_ADDR_W-1:0] input_page_boundary; + + // + // Input timeout counter. Timeout count only increments when there is some + // data waiting to be written to the RAM. + // + always @(posedge clk) begin + if (rst | set_clear) begin + input_timeout_count <= 0; + input_timeout_triggered <= 0; + end else if (input_timeout_reset) begin + input_timeout_count <= 0; + input_timeout_triggered <= 0; + end else if (input_timeout_count == set_timeout) begin + input_timeout_triggered <= 1; + end else if (input_state == INPUT_IDLE) begin + input_timeout_count <= input_timeout_count + ((occupied_input != 0) ? 1 : 0); + end + end + + // + // Input State Machine + // + always @(posedge clk) + if (rst | set_clear) begin + input_state <= INPUT_IDLE; + write_addr <= set_fifo_addr_base & ~set_fifo_addr_mask; + input_timeout_reset <= 1'b0; + write_ctrl_valid <= 1'b0; + write_count <= 8'd0; + write_count_plus_one <= 9'd1; + update_write <= 1'b0; + end else begin + case (input_state) + // + // INPUT_IDLE. + // To start an input transfer to DRAM need: + // 1) Space in the RAM + // and either + // 2) 256 entries in the input FIFO + // or + // 3) Timeout occurred while waiting for more data, which can only happen + // if there's at least one word in the input FIFO). + // + INPUT_IDLE: begin + write_ctrl_valid <= 1'b0; + update_write <= 1'b0; + input_timeout_reset <= 1'b0; + if (space[WORD_ADDR_W:8] != 'd0) begin // (space > 255): 256 or more slots in the RAM + if (occupied_input[15:8] != 'd0) begin // (occupied_input > 255): 256 or more words in input FIFO + input_state <= INPUT1; + input_timeout_reset <= 1'b1; + + // Calculate the number of entries remaining until next 4 KiB page + // boundary is crossed, minus 1. The units of calculation are + // words. The address is always word aligned. + input_page_boundary <= { write_addr[MEM_ADDR_W-1:12], {12-BYTE_ADDR_W{1'b1}} } - + write_addr[MEM_ADDR_W-1 : BYTE_ADDR_W]; + end else if (input_timeout_triggered) begin // input FIFO timeout waiting for new data. + input_state <= INPUT2; + input_timeout_reset <= 1'b1; + // Calculate the number of entries remaining until next 4 KiB page + // boundary is crossed, minus 1. The units of calculation are + // words. The address is always word-aligned. + input_page_boundary <= { write_addr[MEM_ADDR_W-1:12], {12-BYTE_ADDR_W{1'b1}} } - + write_addr[MEM_ADDR_W-1 : BYTE_ADDR_W]; + end + end + end + // + // INPUT1. + // Caused by input FIFO reaching 256 entries. + // Request write burst of lesser of: + // 1) Entries until page boundary crossed + // 2) 256 + // + INPUT1: begin + // Replicated write logic to break a read timing critical path for + // write_count. + write_count <= input_page_boundary[min(12, WORD_ADDR_W)-1:8] == 0 ? + input_page_boundary[7:0] : + 255; + write_count_plus_one <= input_page_boundary[min(12, WORD_ADDR_W)-1:8] == 0 ? + input_page_boundary[7:0] + 1 : + 256; + write_ctrl_valid <= 1'b1; + if (write_ctrl_ready) + input_state <= INPUT4; // Preemptive ACK + else + input_state <= INPUT3; // Wait for ACK + end + // + // INPUT2. + // Caused by timeout of input FIFO (occupied_input must now be 256 or + // less since it was 255 or less in the INPUT_IDLE state; otherwise we + // would have gone to INPUT1). Request write burst of lesser of: + // 1) Entries until page boundary crossed + // 2) Entries in input FIFO + // + INPUT2: begin + // Replicated write logic to break a read timing critical path for + // write_count. + write_count <= input_page_boundary < occupied_input[8:0] - 1 ? + input_page_boundary[7:0] : + occupied_input[8:0] - 1; // Max result of 255 + write_count_plus_one <= input_page_boundary < occupied_input[8:0] - 1 ? + input_page_boundary[7:0] + 1 : + occupied_input[8:0]; + write_ctrl_valid <= 1'b1; + if (write_ctrl_ready) + input_state <= INPUT4; // Preemptive ACK + else + input_state <= INPUT3; // Wait for ACK + end + // + // INPUT3. + // Wait in this state for AXI4 DMA engine to accept transaction. + // + INPUT3: begin + if (write_ctrl_ready) begin + write_ctrl_valid <= 1'b0; + input_state <= INPUT4; // ACK + end else begin + write_ctrl_valid <= 1'b1; + input_state <= INPUT3; // Wait for ACK + end + end + // + // INPUT4. + // Wait here until write_ctrl_ready_deasserts. This is important as the + // next time it asserts we know that a write response was received. + INPUT4: begin + write_ctrl_valid <= 1'b0; + if (!write_ctrl_ready) + input_state <= INPUT5; // Move on + else + input_state <= INPUT4; // Wait for deassert + end + // + // INPUT5. + // Transaction has been accepted by AXI4 DMA engine. Now we wait for the + // re-assertion of write_ctrl_ready which signals that the AXI4 DMA + // engine has received a response for the whole write transaction and we + // assume that this means it is committed to DRAM. We are now free to + // update write_addr pointer and go back to idle state. + // + INPUT5: begin + write_ctrl_valid <= 1'b0; + if (write_ctrl_ready) begin + write_addr <= ((write_addr + (write_count_plus_one << $clog2(MEM_DATA_W/8))) & set_fifo_addr_mask) | (write_addr & ~set_fifo_addr_mask); + input_state <= INPUT6; + update_write <= 1'b1; + end else begin + input_state <= INPUT5; + end + end + // + // INPUT6: + // Need to let space update before looking if there's more to do. + // + INPUT6: begin + input_state <= INPUT_IDLE; + update_write <= 1'b0; + end + + default: + input_state <= INPUT_IDLE; + endcase // case(input_state) + end + + + //--------------------------------------------------------------------------- + // Read Suppression Logic + //--------------------------------------------------------------------------- + // + // Monitor occupied_input to deduce when DRAM FIFO is running short of + // bandwidth and there is a danger of back-pressure passing upstream of the + // DRAM FIFO. In this situation, we suppress read requests to the DRAM FIFO + // so that more bandwidth is available to writes. + // + // However, not reading can actually cause the FIFO to fill up and stall, so + // if the input is stalled, allow switching back to reads. This allows the + // memory to fill up without causing deadlock. + // + //--------------------------------------------------------------------------- + + reg input_idle, input_idle_d1, input_stalled; + + always @(posedge clk) begin + // We consider the input to be stalled when the input state machine is idle + // for 2 or more clock cycles. + input_idle <= (input_state == INPUT_IDLE); + input_idle_d1 <= input_idle; + input_stalled <= input_idle && input_idle_d1; + + space_input_reg <= space_input; + if (space_input_reg < set_suppress_threshold && !input_stalled) + suppress_reads <= 1'b1; + else + suppress_reads <= 1'b0; + end + + + //--------------------------------------------------------------------------- + // Output Handling and Buffer + //--------------------------------------------------------------------------- + // + // This block buffers output data and extracts the TLAS signal that was + // embedded into the data stream. + // + //--------------------------------------------------------------------------- + + // Large FIFO to buffer data read from DRAM. This FIFO must be large enough + // to accept a full burst read. + axi_fifo #( + .WIDTH (MEM_DATA_W), + .SIZE (OUT_FIFO_SIZE) + ) output_fifo ( + .clk (clk), + .reset (rst), + .clear (set_clear), + // + .i_tdata (m_tdata_output), + .i_tvalid (m_tvalid_output), + .i_tready (m_tready_output), + // + .o_tdata (m_tdata_fifo), + .o_tvalid (m_tvalid_fifo), + .o_tready (m_tready_fifo), + // + .space (space_output), + .occupied () + ); + + // Place flops right after FIFO to improve timing + axi_fifo_flop2 #( + .WIDTH (MEM_DATA_W) + ) output_pipe_i0 ( + .clk (clk), + .reset (rst), + .clear (set_clear), + // + .i_tdata (m_tdata_i0), + .i_tvalid (m_tvalid_i0), + .i_tready (m_tready_i0), + // + .o_tdata (m_tdata_i1), + .o_tvalid (m_tvalid_i1), + .o_tready (m_tready_i1), + // + .space (), + .occupied () + ); + + // Pipeline flop before TLAST extraction logic + axi_fifo_flop2 #( + .WIDTH (MEM_DATA_W) + ) output_pipe_i1 ( + .clk (clk), + .reset (rst), + .clear (set_clear), + // + .i_tdata (m_tdata_i1), + .i_tvalid (m_tvalid_i1), + .i_tready (m_tready_i1), + // + .o_tdata (m_tdata_i2), + .o_tvalid (m_tvalid_i2), + .o_tready (m_tready_i2), + // + .space (), + .occupied () + ); + + axi_extract_tlast_tkeep #( + .DATA_W (MEM_DATA_W), + .KEEP_W (KEEP_W) + ) axi_extract_tlast_tkeep_i ( + .clk (clk), + .rst (rst | set_clear), + // + .i_tdata (m_tdata_i2), + .i_tvalid (m_tvalid_i2), + .i_tready (m_tready_i2), + // + .o_tdata (m_tdata_i3), + .o_tkeep (m_tkeep_i3), + .o_tlast (m_tlast_i3), + .o_tvalid (m_tvalid_i3), + .o_tready (m_tready_i3) + ); + + // Pipeline flop after TLAST extraction logic + axi_fifo_flop2 #( + .WIDTH (MEM_DATA_W+1+KEEP_W) + ) output_pipe_i3 ( + .clk (clk), + .reset (rst), + .clear (set_clear), + // + .i_tdata ({m_tkeep_i3, m_tlast_i3, m_tdata_i3}), + .i_tvalid (m_tvalid_i3), + .i_tready (m_tready_i3), + // + .o_tdata ({m_tkeep, m_tlast, m_tdata}), + .o_tvalid (m_tvalid), + .o_tready (m_tready), + // + .space (), + .occupied () + ); + + + //------------------------------------------------------------------------- + // Output (Memory Read) Logic + //------------------------------------------------------------------------- + // + // The output state machine Wait for enough entries in RAM to trigger read + // burst. A timeout can also trigger a burst so that smaller chunks of data + // are not left to rot in the RAM. Also, if enough data is present in the RAM + // to complete a burst up to the edge of a 4 KiB page boundary then we do a + // burst up to the 4 KiB boundary. + // + //--------------------------------------------------------------------------- + + // + // Output side declarations + // + localparam [2:0] OUTPUT_IDLE = 0; + localparam [2:0] OUTPUT1 = 1; + localparam [2:0] OUTPUT2 = 2; + localparam [2:0] OUTPUT3 = 3; + localparam [2:0] OUTPUT4 = 4; + localparam [2:0] OUTPUT5 = 5; + localparam [2:0] OUTPUT6 = 6; + + reg [ 2:0] output_state; + reg output_timeout_triggered; + reg output_timeout_reset; + reg [ TIMEOUT_W-1:0] output_timeout_count; + reg [MEM_ADDR_W-1:0] read_addr; + reg read_ctrl_valid; + wire read_ctrl_ready; + reg [ 7:0] read_count = 0; + reg [ 8:0] read_count_plus_one = 1; // Maintain a +1 version to break critical timing paths + reg update_read; + + reg [WORD_ADDR_W-1:0] output_page_boundary; // Cache in a register to break critical timing paths + + // + // Output Packet Counter + // + always @(posedge clk) begin + if (rst) begin + out_pkt_count <= 0; + end else if (m_tlast & m_tvalid & m_tready) begin + out_pkt_count <= out_pkt_count + 1; + end + end + + // + // Output timeout counter. Timeout count only increments when there is some + // data waiting to be read from the RAM. + // + always @(posedge clk) begin + if (rst | set_clear) begin + output_timeout_count <= 0; + output_timeout_triggered <= 0; + end else if (output_timeout_reset) begin + output_timeout_count <= 0; + output_timeout_triggered <= 0; + end else if (output_timeout_count == set_timeout) begin + output_timeout_triggered <= 1; + end else if (output_state == OUTPUT_IDLE) begin + output_timeout_count <= output_timeout_count + ((occupied != 0) ? 1 : 0); + end + end + + // + // Output State Machine + // + always @(posedge clk) + if (rst | set_clear) begin + output_state <= OUTPUT_IDLE; + read_addr <= set_fifo_addr_base & ~set_fifo_addr_mask; + output_timeout_reset <= 1'b0; + read_ctrl_valid <= 1'b0; + read_count <= 8'd0; + read_count_plus_one <= 9'd1; + update_read <= 1'b0; + end else begin + case (output_state) + // + // OUTPUT_IDLE. + // To start an output transfer from DRAM + // 1) Space in the output FIFO + // and either + // 2) 256 entries in the RAM + // or + // 3) Timeout occurred while waiting for more data, which can only happen + // if there's at least one word in the RAM. + // + OUTPUT_IDLE: begin + read_ctrl_valid <= 1'b0; + update_read <= 1'b0; + output_timeout_reset <= 1'b0; + if (space_output[15:8] != 'd0 && !suppress_reads) begin // (space_output > 255): 256 or more words in the output FIFO + if (occupied[WORD_ADDR_W:8] != 'd0) begin // (occupied > 255): 256 or more words in RAM + output_state <= OUTPUT1; + output_timeout_reset <= 1'b1; + + // Calculate the number of entries remaining until next 4 KiB page + // boundary is crossed, minus 1. The units of calculation are + // words. The address is always word-aligned. + output_page_boundary <= { read_addr[MEM_ADDR_W-1:12], {12-BYTE_ADDR_W{1'b1}} } - + read_addr[MEM_ADDR_W-1 : BYTE_ADDR_W]; + end else if (output_timeout_triggered) begin // output FIFO timeout waiting for new data. + output_state <= OUTPUT2; + output_timeout_reset <= 1'b1; + // Calculate the number of entries remaining until next 4 KiB page + // boundary is crossed, minus 1. The units of calculation are + // words. The address is always word-aligned. + output_page_boundary <= { read_addr[MEM_ADDR_W-1:12], {12-BYTE_ADDR_W{1'b1}} } - + read_addr[MEM_ADDR_W-1 : BYTE_ADDR_W]; + end + end + end + // + // OUTPUT1. + // Caused by RAM FIFO reaching 256 entries. + // Request read burst of lesser of lesser of: + // 1) Entries until page boundary crossed + // 2) 256 + // + OUTPUT1: begin + // Replicated write logic to break a read timing critical path for read_count + read_count <= output_page_boundary[min(12, WORD_ADDR_W)-1:8] == 0 ? + output_page_boundary[7:0] : + 255; + read_count_plus_one <= output_page_boundary[min(12, WORD_ADDR_W)-1:8] == 0 ? + output_page_boundary[7:0] + 1 : + 256; + read_ctrl_valid <= 1'b1; + if (read_ctrl_ready) + output_state <= OUTPUT4; // Preemptive ACK + else + output_state <= OUTPUT3; // Wait for ACK + end + // + // OUTPUT2. + // Caused by timeout of main FIFO + // Request read burst of lesser of: + // 1) Entries until page boundary crossed + // 2) Entries in main FIFO + // + OUTPUT2: begin + // Replicated write logic to break a read timing critical path for read_count + read_count <= output_page_boundary < occupied_minus_one ? + output_page_boundary[7:0] : + occupied_minus_one[7:0]; + read_count_plus_one <= output_page_boundary < occupied_minus_one ? + output_page_boundary[7:0] + 1 : + occupied[7:0]; + read_ctrl_valid <= 1'b1; + if (read_ctrl_ready) + output_state <= OUTPUT4; // Preemptive ACK + else + output_state <= OUTPUT3; // Wait for ACK + end + // + // OUTPUT3. + // Wait in this state for AXI4 DMA engine to accept transaction. + // + OUTPUT3: begin + if (read_ctrl_ready) begin + read_ctrl_valid <= 1'b0; + output_state <= OUTPUT4; // ACK + end else begin + read_ctrl_valid <= 1'b1; + output_state <= OUTPUT3; // Wait for ACK + end + end + // + // OUTPUT4. + // Wait here until read_ctrl_ready_deasserts. This is important as the + // next time it asserts we know that a read response was received. + OUTPUT4: begin + read_ctrl_valid <= 1'b0; + if (!read_ctrl_ready) + output_state <= OUTPUT5; // Move on + else + output_state <= OUTPUT4; // Wait for deassert + end + // + // OUTPUT5. + // Transaction has been accepted by AXI4 DMA engine. Now we wait for the + // re-assertion of read_ctrl_ready which signals that the AXI4 DMA engine + // has received a last signal and good response for the whole read + // transaction. We are now free to update read_addr pointer and go back + // to idle state. + // + OUTPUT5: begin + read_ctrl_valid <= 1'b0; + if (read_ctrl_ready) begin + read_addr <= ((read_addr + (read_count_plus_one << $clog2(MEM_DATA_W/8))) & set_fifo_addr_mask) | (read_addr & ~set_fifo_addr_mask); + output_state <= OUTPUT6; + update_read <= 1'b1; + end else begin + output_state <= OUTPUT5; + end + end + // + // OUTPUT6. + // Need to get occupied value updated before checking if there's more to do. + // + OUTPUT6: begin + update_read <= 1'b0; + output_state <= OUTPUT_IDLE; + end + + default: + output_state <= OUTPUT_IDLE; + endcase // case(output_state) + end + + + //--------------------------------------------------------------------------- + // Shared Read/Write Logic + //--------------------------------------------------------------------------- + + // + // Count number of words stored in the RAM FIFO. + // + always @(posedge clk) begin + if (rst | set_clear) begin + occupied <= 0; + occupied_minus_one <= -1; + end else begin + occupied <= occupied + (update_write ? write_count_plus_one : 0) - (update_read ? read_count_plus_one : 0); + occupied_minus_one <= occupied_minus_one + (update_write ? write_count_plus_one : 0) - (update_read ? read_count_plus_one : 0); + end + end + + // + // Count amount of space in the RAM FIFO, in words. + // + always @(posedge clk) begin + if (rst | set_clear) begin + // Set to the FIFO size minus 64 words to make allowance for read/write + // reordering in DRAM controller. + // TODO: Is the 64-word extra needed? Why 64? + space <= set_fifo_addr_mask[MEM_ADDR_W-1 -: WORD_ADDR_W] & ~('d63); + end else begin + space <= space - (update_write ? write_count_plus_one : 0) + (update_read ? read_count_plus_one : 0); + end + end + + + //--------------------------------------------------------------------------- + // AXI4 DMA Master + //--------------------------------------------------------------------------- + + axi_dma_master #( + .AWIDTH (MEM_ADDR_W), + .DWIDTH (MEM_DATA_W) + ) axi_dma_master_i ( + .aclk (clk), + .areset (rst | set_clear), + // Write Address + .m_axi_awid (m_axi_awid), + .m_axi_awaddr (m_axi_awaddr), + .m_axi_awlen (m_axi_awlen), + .m_axi_awsize (m_axi_awsize), + .m_axi_awburst (m_axi_awburst), + .m_axi_awvalid (m_axi_awvalid), + .m_axi_awready (m_axi_awready), + .m_axi_awlock (m_axi_awlock), + .m_axi_awcache (m_axi_awcache), + .m_axi_awprot (m_axi_awprot), + .m_axi_awqos (m_axi_awqos), + .m_axi_awregion (m_axi_awregion), + .m_axi_awuser (m_axi_awuser), + // Write Data + .m_axi_wdata (m_axi_wdata), + .m_axi_wstrb (m_axi_wstrb), + .m_axi_wlast (m_axi_wlast), + .m_axi_wvalid (m_axi_wvalid), + .m_axi_wready (m_axi_wready), + .m_axi_wuser (m_axi_wuser), + // Write Response + .m_axi_bid (m_axi_bid), + .m_axi_bresp (m_axi_bresp), + .m_axi_bvalid (m_axi_bvalid), + .m_axi_bready (m_axi_bready), + .m_axi_buser (m_axi_buser), + // Read Address + .m_axi_arid (m_axi_arid), + .m_axi_araddr (m_axi_araddr), + .m_axi_arlen (m_axi_arlen), + .m_axi_arsize (m_axi_arsize), + .m_axi_arburst (m_axi_arburst), + .m_axi_arvalid (m_axi_arvalid), + .m_axi_arready (m_axi_arready), + .m_axi_arlock (m_axi_arlock), + .m_axi_arcache (m_axi_arcache), + .m_axi_arprot (m_axi_arprot), + .m_axi_arqos (m_axi_arqos), + .m_axi_arregion (m_axi_arregion), + .m_axi_aruser (m_axi_aruser), + // Read Data + .m_axi_rid (m_axi_rid), + .m_axi_rdata (m_axi_rdata), + .m_axi_rresp (m_axi_rresp), + .m_axi_rlast (m_axi_rlast), + .m_axi_rvalid (m_axi_rvalid), + .m_axi_rready (m_axi_rready), + .m_axi_ruser (m_axi_ruser), + // + // DMA interface for Write transactions + // + .write_addr (write_addr), // Byte address for start of write transaction (should be 64-bit aligned) + .write_count (write_count), // Count of 64-bit words to write. + .write_ctrl_valid (write_ctrl_valid), + .write_ctrl_ready (write_ctrl_ready), + .write_data (s_tdata_input), + .write_data_valid (s_tvalid_input), + .write_data_ready (s_tready_input), + // + // DMA interface for Read transactions + // + .read_addr (read_addr), // Byte address for start of read transaction (should be 64-bit aligned) + .read_count (read_count), // Count of 64-bit words to read. + .read_ctrl_valid (read_ctrl_valid), + .read_ctrl_ready (read_ctrl_ready), + .read_data (m_tdata_output), + .read_data_valid (m_tvalid_output), + .read_data_ready (m_tready_output), + // + // Debug + // + .debug () + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo_bist.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo_bist.v new file mode 100644 index 000000000..2dd3f99d3 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo_bist.v @@ -0,0 +1,294 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axi_ram_fifo_bist +// +// Description: +// +// Implements a built-in self test for the RAM FIFO. It can generate random +// or sequential data that it outputs as quickly as possible. The output of +// the RAM is verified to make sure that it matches what was input to the RAM. +// +// Parameters: +// +// DATA_W : The width of the data port to use for the AXI4-Stream interface +// +// COUNT_W : Width of internal counters. This must be wide enough so that +// word, cycle, and and error counters don't overflow during a +// test. +// +// CLK_RATE : The frequency of clk in Hz +// +// RAND : Set to 1 for random data, 0 for sequential data. +// + +module axi_ram_fifo_bist #( + parameter DATA_W = 64, + parameter COUNT_W = 48, + parameter CLK_RATE = 200e6, + parameter RAND = 1 +) ( + input clk, + input rst, + + //-------------------------------------------------------------------------- + // CTRL Port + //-------------------------------------------------------------------------- + + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + output wire s_ctrlport_resp_ack, + output wire [31:0] s_ctrlport_resp_data, + + //-------------------------------------------------------------------------- + // AXI-Stream Interface + //-------------------------------------------------------------------------- + + // Output to RAM FIFO + output wire [DATA_W-1:0] m_tdata, + output reg m_tvalid, + input wire m_tready, + + // Input from RAM FIFO + input wire [DATA_W-1:0] s_tdata, + input wire s_tvalid, + output wire s_tready, + + //--------------------------------------------------------------------------- + // Status + //--------------------------------------------------------------------------- + + output reg running + +); + + //--------------------------------------------------------------------------- + // Local Parameters + //--------------------------------------------------------------------------- + + // Internal word size to use for data generation. The output word will be a + // multiple of this size. + localparam WORD_W = 32; + + // Random number seed (must not be 0) + localparam [WORD_W-1:0] SEED = 'h012345678; + + // Test data reset value + localparam [WORD_W-1:0] INIT = RAND ? SEED : 0; + + + //--------------------------------------------------------------------------- + // Assertions + //--------------------------------------------------------------------------- + + if (DATA_W % WORD_W != 0) begin + DATA_W_must_be_a_multiple_of_WORD_W(); + end + + // LFSR only supports 8, 16, and 32 bits + if (WORD_W != 32 && WORD_W != 16 && WORD_W != 8) begin + WORD_W_not_supported(); + end + + //--------------------------------------------------------------------------- + // Functions + //--------------------------------------------------------------------------- + + // Linear-feedback Shift Register for random number generation. + function [WORD_W-1:0] lfsr(input [WORD_W-1:0] din); + reg new_bit; + begin + case (WORD_W) + 8 : new_bit = din[7] ^ din[5] ^ din[4] ^ din[3]; + 16 : new_bit = din[15] ^ din[14] ^ din[12] ^ din[3]; + 32 : new_bit = din[31] ^ din[21] ^ din[1] ^ din[0]; + endcase + lfsr = { din[WORD_W-2:0], new_bit }; + end + endfunction + + function [WORD_W-1:0] next(input [WORD_W-1:0] din); + next = RAND ? lfsr(din) : din + 1; + endfunction + + + //--------------------------------------------------------------------------- + // Signal Declarations + //--------------------------------------------------------------------------- + + reg [COUNT_W-1:0] tx_count; // Number of words transmitted to FIFO + reg [COUNT_W-1:0] rx_count; // Number of words received back from FIFO + reg [COUNT_W-1:0] error_count; // Number of words that show errors + + reg [WORD_W-1:0] tx_data = next(INIT); // Transmitted data word + reg [DATA_W-1:0] rx_data = INIT; // Received data words + reg [WORD_W-1:0] exp_data; // Expected data word + reg rx_valid; // Received word is value (strobe) + + wire [COUNT_W-1:0] num_words; // Number of words to test + reg [COUNT_W-1:0] cycle_count; // Number of clock cycles test has been running for + wire start; // Start test + wire stop; // Stop test + wire clear; // Clear the counters + wire continuous; // Continuous test mode + + + //--------------------------------------------------------------------------- + // Registers + //--------------------------------------------------------------------------- + + axi_ram_fifo_bist_regs #( + .DATA_W (DATA_W), + .COUNT_W (COUNT_W), + .CLK_RATE (CLK_RATE) + ) axi_ram_fifo_bist_regs_i ( + .clk (clk), + .rst (rst), + .s_ctrlport_req_wr (s_ctrlport_req_wr), + .s_ctrlport_req_rd (s_ctrlport_req_rd), + .s_ctrlport_req_addr (s_ctrlport_req_addr), + .s_ctrlport_req_data (s_ctrlport_req_data), + .s_ctrlport_resp_ack (s_ctrlport_resp_ack), + .s_ctrlport_resp_data (s_ctrlport_resp_data), + .tx_count (tx_count), + .rx_count (rx_count), + .error_count (error_count), + .cycle_count (cycle_count), + .num_words (num_words), + .start (start), + .stop (stop), + .clear (clear), + .continuous (continuous), + .running (running) + ); + + + //--------------------------------------------------------------------------- + // State Machine + //--------------------------------------------------------------------------- + + localparam ST_IDLE = 0; + localparam ST_ACTIVE = 1; + localparam ST_WAIT_DONE = 2; + + reg [ 1:0] state; + reg [COUNT_W-1:0] num_words_m1; + + always @(posedge clk) begin + if (rst) begin + state <= ST_IDLE; + m_tvalid <= 0; + running <= 0; + end else begin + m_tvalid <= 0; + + case (state) + ST_IDLE : begin + num_words_m1 <= num_words-1; + if (start) begin + running <= 1; + state <= ST_ACTIVE; + end + end + + ST_ACTIVE : begin + if (stop || (tx_count == num_words_m1 && m_tvalid && m_tready && !continuous)) begin + m_tvalid <= 0; + state <= ST_WAIT_DONE; + end else begin + m_tvalid <= 1; + running <= 1; + end + end + + ST_WAIT_DONE : begin + if (rx_count >= tx_count) begin + running <= 0; + state <= ST_IDLE; + end + end + endcase + end + end + + + //--------------------------------------------------------------------------- + // Data Generator + //--------------------------------------------------------------------------- + + reg count_en; + + // Output data is the concatenation of our generated test word. + assign m_tdata = {(DATA_W/WORD_W){ tx_data }}; + + // We were born ready + assign s_tready = 1; + + always @(posedge clk) begin + if (rst) begin + tx_data <= next(INIT); + exp_data <= INIT; + rx_valid <= 0; + tx_count <= 0; + rx_count <= 0; + error_count <= 0; + cycle_count <= 0; + count_en <= 0; + end else begin + // + // Output Data generation + // + if (m_tvalid && m_tready) begin + tx_data <= next(tx_data); + tx_count <= tx_count + 1; + end + + // + // Expected Data Generation + // + if (s_tvalid & s_tready) begin + rx_valid <= 1; + exp_data <= next(exp_data); + rx_count <= rx_count + 1; + rx_data <= s_tdata; + end else begin + rx_valid <= 0; + end + + // + // Data checker + // + if (rx_valid) begin + if (rx_data !== {(DATA_W/WORD_W){exp_data}}) begin + error_count <= error_count + 1; + end + end + + // + // Cycle Counter + // + // Start counting after get the first word back so that we measure + // throughput and not latency. + if (state == ST_IDLE) count_en <= 0; + else if (s_tvalid) count_en <= 1; + + if (count_en) cycle_count <= cycle_count + 1; + + // + // Clear counters upon request + // + if (clear) begin + tx_count <= 0; + rx_count <= 0; + error_count <= 0; + cycle_count <= 0; + end + end + end + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo_bist_regs.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo_bist_regs.v new file mode 100644 index 000000000..c161c10f5 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo_bist_regs.v @@ -0,0 +1,206 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axi_ram_fifo_bist_regs +// +// Description: +// +// Implements the registers for the RAM FIFO BIST logic. +// +// Parameters: +// +// DATA_W : The width of the data port to use for the AXI4-Stream +// interface. +// +// COUNT_W : Width of internal counters. This must be wide enough so that +// word, cycle, and and error counters don't overflow during a +// test. +// +// CLK_RATE : The frequency of clk in Hz +// + +module axi_ram_fifo_bist_regs #( + parameter DATA_W = 64, + parameter COUNT_W = 48, + parameter CLK_RATE = 200e6 +) ( + input clk, + input rst, + + //-------------------------------------------------------------------------- + // CTRL Port + //-------------------------------------------------------------------------- + + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + output reg s_ctrlport_resp_ack, + output reg [31:0] s_ctrlport_resp_data, + + //-------------------------------------------------------------------------- + // Control and Status + //-------------------------------------------------------------------------- + + input wire [COUNT_W-1:0] tx_count, + input wire [COUNT_W-1:0] rx_count, + input wire [COUNT_W-1:0] error_count, + input wire [COUNT_W-1:0] cycle_count, + + output wire [COUNT_W-1:0] num_words, + + output reg start, + output reg stop, + output reg clear, + output reg continuous, + input wire running +); + + `include "axi_ram_fifo_regs.vh" + + localparam BYTES_PER_WORD = DATA_W/8; + localparam WORD_SHIFT = $clog2(BYTES_PER_WORD); + + // Make sure DATA_W is a power of 2, or else the word/byte count conversion + // logic won't be correct. + if (2**$clog2(DATA_W) != DATA_W) begin + DATA_W_must_be_a_power_of_2(); + end + + // The register logic currently assumes that COUNT_W is at least 33 bits. + if (COUNT_W <= 32) begin + COUNT_W_must_be_larger_than_32(); + end + + wire [19:0] word_addr; + wire [63:0] tx_byte_count; + wire [63:0] rx_byte_count; + reg [63:0] num_bytes = 0; + + reg [31:0] tx_byte_count_hi = 0; + reg [31:0] rx_byte_count_hi = 0; + reg [31:0] error_count_hi = 0; + reg [31:0] cycle_count_hi = 0; + + // Only use the word address to simplify address decoding logic + assign word_addr = {s_ctrlport_req_addr[19:2], 2'b00 }; + + // Convert between words and bytes + assign tx_byte_count = tx_count << WORD_SHIFT; + assign rx_byte_count = rx_count << WORD_SHIFT; + assign num_words = num_bytes >> WORD_SHIFT; + + + always @(posedge clk) begin + if (rst) begin + s_ctrlport_resp_ack <= 0; + start <= 0; + stop <= 0; + continuous <= 0; + clear <= 0; + num_bytes <= 0; + end else begin + // Default values + s_ctrlport_resp_ack <= 0; + start <= 0; + stop <= 0; + clear <= 0; + + //----------------------------------------------------------------------- + // Read Logic + //----------------------------------------------------------------------- + + if (s_ctrlport_req_rd) begin + case (word_addr) + REG_BIST_CTRL : begin + s_ctrlport_resp_data <= 0; + s_ctrlport_resp_data[REG_BIST_RUNNING_POS] <= running; + s_ctrlport_resp_data[REG_BIST_CONT_POS] <= continuous; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_CLK_RATE : begin + s_ctrlport_resp_data <= CLK_RATE; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_NUM_BYTES_LO : begin + s_ctrlport_resp_data <= num_bytes[31:0]; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_NUM_BYTES_HI : begin + s_ctrlport_resp_data <= num_bytes[63:32]; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_TX_BYTE_COUNT_LO : begin + s_ctrlport_resp_data <= tx_byte_count[31:0]; + tx_byte_count_hi <= tx_byte_count[63:32]; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_TX_BYTE_COUNT_HI : begin + s_ctrlport_resp_data <= tx_byte_count_hi; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_RX_BYTE_COUNT_LO : begin + s_ctrlport_resp_data <= rx_byte_count[31:0]; + rx_byte_count_hi[COUNT_W-33:0] <= rx_byte_count[COUNT_W-1:32]; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_RX_BYTE_COUNT_HI : begin + s_ctrlport_resp_data <= rx_byte_count_hi; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_ERROR_COUNT_LO : begin + s_ctrlport_resp_data <= error_count[31:0]; + error_count_hi[COUNT_W-33:0] <= error_count[COUNT_W-1:32]; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_ERROR_COUNT_HI : begin + s_ctrlport_resp_data <= 0; + s_ctrlport_resp_data <= error_count_hi; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_CYCLE_COUNT_LO : begin + s_ctrlport_resp_data <= cycle_count[31:0]; + cycle_count_hi[COUNT_W-33:0] <= cycle_count[COUNT_W-1:32]; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_CYCLE_COUNT_HI : begin + s_ctrlport_resp_data <= 0; + s_ctrlport_resp_data <= cycle_count_hi; + s_ctrlport_resp_ack <= 1; + end + endcase + end + + + //----------------------------------------------------------------------- + // Write Logic + //----------------------------------------------------------------------- + + if (s_ctrlport_req_wr) begin + case (word_addr) + REG_BIST_CTRL : begin + start <= s_ctrlport_req_data[REG_BIST_START_POS]; + stop <= s_ctrlport_req_data[REG_BIST_STOP_POS]; + clear <= s_ctrlport_req_data[REG_BIST_CLEAR_POS]; + continuous <= s_ctrlport_req_data[REG_BIST_CONT_POS]; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_NUM_BYTES_LO : begin + // Update only the word-count portion + num_bytes[31:WORD_SHIFT] <= s_ctrlport_req_data[31:WORD_SHIFT]; + s_ctrlport_resp_ack <= 1; + end + REG_BIST_NUM_BYTES_HI : begin + num_bytes[COUNT_W-1:32] <= s_ctrlport_req_data[COUNT_W-33:0]; + s_ctrlport_resp_ack <= 1; + end + endcase + end + + end + end + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo_regs.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo_regs.v new file mode 100644 index 000000000..4496d172d --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo_regs.v @@ -0,0 +1,207 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axi_ram_fifo_regs +// +// Description: +// +// Implements the software-accessible registers for the axi_ram_fifo block. +// + + +module axi_ram_fifo_regs #( + parameter MEM_ADDR_W = 32, + parameter MEM_DATA_W = 64, + parameter [MEM_ADDR_W-1:0] FIFO_ADDR_BASE = 'h0, + parameter [MEM_ADDR_W-1:0] FIFO_ADDR_MASK = 'h0000FFFF, + parameter [MEM_ADDR_W-1:0] FIFO_ADDR_MASK_MIN = 'h00000FFF, + parameter BIST = 1, + parameter IN_FIFO_SIZE = 10, + parameter WORD_ADDR_W = 29, + parameter BURST_TIMEOUT = 128, + parameter TIMEOUT_W = 12 +) ( + + input wire clk, + input wire rst, + + //-------------------------------------------------------------------------- + // CTRL Port + //-------------------------------------------------------------------------- + + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + output reg s_ctrlport_resp_ack, + output reg [31:0] s_ctrlport_resp_data, + + //--------------------------------------------------------------------------- + // Register Inputs and Outputs + //--------------------------------------------------------------------------- + + // Read-back Registers + input wire [ 31:0] rb_out_pkt_count, + input wire [WORD_ADDR_W:0] rb_occupied, + + // Settings Registers + output reg [ 15:0] set_suppress_threshold, + output reg [ TIMEOUT_W-1:0] set_timeout, + output reg [MEM_ADDR_W-1:0] set_fifo_addr_base = FIFO_ADDR_BASE, + output reg [MEM_ADDR_W-1:0] set_fifo_addr_mask = FIFO_ADDR_MASK +); + + `include "axi_ram_fifo_regs.vh" + + function automatic integer min(input integer a, b); + min = a < b ? a : b; + endfunction + + function automatic integer max(input integer a, b); + max = a > b ? a : b; + endfunction + + wire [19:0] word_addr; + wire [63:0] reg_fifo_fullness; + reg [31:0] reg_fifo_fullness_hi; + + // Only use the word address to simplify address decoding logic + assign word_addr = {s_ctrlport_req_addr[19:2], 2'b00 }; + + // Convert the "occupied" word count to a 64-bit byte value + assign reg_fifo_fullness = { + {64-MEM_ADDR_W{1'b0}}, // Set unused upper bits to 0 + rb_occupied, + {(MEM_ADDR_W-WORD_ADDR_W){1'b0}} // Set byte offset bits to 0 + }; + + always @(posedge clk) begin + if (rst) begin + s_ctrlport_resp_ack <= 0; + set_suppress_threshold <= 0; + set_timeout <= BURST_TIMEOUT; + set_fifo_addr_base <= FIFO_ADDR_BASE; + set_fifo_addr_mask <= FIFO_ADDR_MASK; + end else begin + s_ctrlport_resp_ack <= 0; + + //----------------------------------------------------------------------- + // Write Logic + //----------------------------------------------------------------------- + + if (s_ctrlport_req_wr) begin + case (word_addr) + REG_FIFO_READ_SUPPRESS : begin + set_suppress_threshold <= s_ctrlport_req_data[REG_FIFO_SUPPRESS_THRESH_POS +: REG_FIFO_SUPPRESS_THRESH_W]; + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_TIMEOUT : begin + set_timeout[REG_TIMEOUT_W-1:0] <= s_ctrlport_req_data[REG_TIMEOUT_W-1:0]; + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_ADDR_BASE_LO : begin + set_fifo_addr_base[min(32, MEM_ADDR_W)-1:0] <= s_ctrlport_req_data[min(32, MEM_ADDR_W)-1:0]; + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_ADDR_BASE_HI : begin + if (MEM_ADDR_W > 32) begin + set_fifo_addr_base[max(32, MEM_ADDR_W-1):32] <= s_ctrlport_req_data[max(0, MEM_ADDR_W-33):0]; + end + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_ADDR_MASK_LO : begin + // Coerce the lower bits so we are guaranteed to meet the minimum mask size requirement. + set_fifo_addr_mask[min(32, MEM_ADDR_W)-1:0] <= + s_ctrlport_req_data[min(32, MEM_ADDR_W)-1:0] | FIFO_ADDR_MASK_MIN; + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_ADDR_MASK_HI : begin + if (MEM_ADDR_W > 32) begin + set_fifo_addr_mask[max(32, MEM_ADDR_W-1):32] <= s_ctrlport_req_data[max(0, MEM_ADDR_W-33):0]; + end + s_ctrlport_resp_ack <= 1; + end + endcase + end + + + //----------------------------------------------------------------------- + // Read Logic + //----------------------------------------------------------------------- + + if (s_ctrlport_req_rd) begin + case (word_addr) + REG_FIFO_INFO : begin + s_ctrlport_resp_data <= 0; + s_ctrlport_resp_data[REG_FIFO_MAGIC_POS +: REG_FIFO_MAGIC_W] <= 16'hF1F0; + s_ctrlport_resp_data[REG_FIFO_BIST_PRSNT_POS] <= (BIST != 0); + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_READ_SUPPRESS : begin + s_ctrlport_resp_data <= 0; + s_ctrlport_resp_data[REG_FIFO_IN_FIFO_SIZE_POS +: REG_FIFO_IN_FIFO_SIZE_W] + <= IN_FIFO_SIZE; + s_ctrlport_resp_data[REG_FIFO_SUPPRESS_THRESH_POS +: REG_FIFO_SUPPRESS_THRESH_W] + <= set_suppress_threshold; + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_MEM_SIZE : begin + s_ctrlport_resp_data <= 0; + s_ctrlport_resp_data[REG_FIFO_DATA_SIZE_POS +: REG_FIFO_DATA_SIZE_W] + <= MEM_DATA_W; + s_ctrlport_resp_data[REG_FIFO_ADDR_SIZE_POS +: REG_FIFO_ADDR_SIZE_W] + <= MEM_ADDR_W; + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_TIMEOUT : begin + s_ctrlport_resp_data <= 0; + s_ctrlport_resp_data[REG_TIMEOUT_W-1:0] <= set_timeout[REG_TIMEOUT_W-1:0]; + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_FULLNESS_LO : begin + s_ctrlport_resp_data <= reg_fifo_fullness[31:0]; + reg_fifo_fullness_hi <= reg_fifo_fullness[63:32]; + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_FULLNESS_HI : begin + s_ctrlport_resp_data <= reg_fifo_fullness_hi; + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_ADDR_BASE_LO : begin + s_ctrlport_resp_data <= 0; + s_ctrlport_resp_data[min(32, MEM_ADDR_W)-1:0] <= set_fifo_addr_base[min(32, MEM_ADDR_W)-1:0]; + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_ADDR_BASE_HI : begin + s_ctrlport_resp_data <= 0; + if (MEM_ADDR_W > 32) begin + s_ctrlport_resp_data[max(0,MEM_ADDR_W-33):0] <= set_fifo_addr_base[max(32, MEM_ADDR_W-1):32]; + end + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_ADDR_MASK_LO : begin + s_ctrlport_resp_data <= 0; + s_ctrlport_resp_data[min(32, MEM_ADDR_W)-1:0] <= set_fifo_addr_mask[min(32, MEM_ADDR_W)-1:0]; + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_ADDR_MASK_HI : begin + s_ctrlport_resp_data <= 0; + if (MEM_ADDR_W > 32) begin + s_ctrlport_resp_data[max(0, MEM_ADDR_W-33):0] <= set_fifo_addr_mask[max(32, MEM_ADDR_W-1):32]; + end + s_ctrlport_resp_ack <= 1; + end + REG_FIFO_PACKET_CNT : begin + s_ctrlport_resp_data <= 0; + s_ctrlport_resp_data <= rb_out_pkt_count; + s_ctrlport_resp_ack <= 1; + end + endcase + end + + end + end + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo_regs.vh b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo_regs.vh new file mode 100644 index 000000000..ccb942552 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/axi_ram_fifo_regs.vh @@ -0,0 +1,228 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axi_ram_fifo_regs (Header) +// +// Description: Header file for axi_ram_fifo_regs. All registers are 32-bit +// words from software's perspective. +// + +// Address space size, per FIFO. That is, each FIFO is separated in the CTRL +// Port address space by 2^FIFO_ADDR_W bytes. +localparam RAM_FIFO_ADDR_W = 7; + + +// REG_FIFO_INFO (R|W) +// +// Contains info/control bits for the FIFO. +// +// [31:16] : Returns the magic number 0xF1F0 (read-only) +// [0] : Indicates if BIST logic is present (read-only) +// +localparam REG_FIFO_INFO = 'h0; +// +localparam REG_FIFO_MAGIC_POS = 16; +localparam REG_FIFO_BIST_PRSNT_POS = 0; +// +localparam REG_FIFO_MAGIC_W = 16; + + +// REG_FIFO_READ_SUPPRESS (R|W) +// +// Controls the read suppression threshold. RAM reads will be disabled whenever +// the amount of free space in the input buffer (in units of RAM words) falls +// below this threshold. This is intended to prevent input buffer overflows +// caused by the RAM being too busy with reads. To disable the read suppression +// feature, set the threshold to 0. In general, the threshold should be set to +// a small value relative to the input FIFO buffer size (the IN_FIFO_SIZE +// field) so that it is only enabled when the input FIFO buffer is close to +// overflowing. +// + +// [31:16] : Address width of input buffer. In other words, the input buffer is +// 2**REG_FIFO_IN_FIFO_SIZE RAM words deep. (read-only) +// [15: 0] : Read suppression threshold, in RAM words (read/write) +// +localparam REG_FIFO_READ_SUPPRESS = 'h4; +// +localparam REG_FIFO_IN_FIFO_SIZE_POS = 16; +localparam REG_FIFO_SUPPRESS_THRESH_POS = 0; +// +localparam REG_FIFO_IN_FIFO_SIZE_W = 16; +localparam REG_FIFO_SUPPRESS_THRESH_W = 16; + + +// REG_FIFO_MEM_SIZE (R) +// +// Returns information about the size of the attached memory. The address size +// allows software to determine what mask and base address values are valid. +// +// [31:16] : Returns the bit width of the RAM word size. +// [15: 0] : Returns the bit width of the RAM byte address size. That is, the +// addressable portion of the attached memory is +// 2**REG_FIFO_ADDR_SIZE bytes. +// +localparam REG_FIFO_MEM_SIZE = 'h8; +// +localparam REG_FIFO_DATA_SIZE_POS = 16; +localparam REG_FIFO_ADDR_SIZE_POS = 0; +// +localparam REG_FIFO_DATA_SIZE_W = 16; +localparam REG_FIFO_ADDR_SIZE_W = 16; + + +// REG_FIFO_TIMEOUT (R/W) +// +// Programs the FIFO timeout, in memory interface clock cycles. For efficiency, +// we want the memory to read and write full bursts. But we also don't want +// smaller amounts of data to be stuck in the FIFO. This timeout determines how +// long we wait for new data before we go ahead and perform a smaller +// read/write. A longer timeout will make more efficient use of the memory, but +// will increase latency. The default value is set by a module parameter. +// +// [31:12] : <Reserved> +// [11: 0] : Timeout +// +localparam REG_FIFO_TIMEOUT = 'hC; +// +localparam REG_TIMEOUT_POS = 0; +localparam REG_TIMEOUT_W = 12; + + +// REG_FIFO_FULLNESS (R) +// +// Returns the fullness of the FIFO in bytes. This is is a 64-bit register in +// which the least-significant 32-bit word must be read first. +// +localparam REG_FIFO_FULLNESS_LO = 'h10; +localparam REG_FIFO_FULLNESS_HI = 'h14; + + +// REG_FIFO_ADDR_BASE (R|W) +// +// Sets the base byte address to use for this FIFO. This should only be updated +// when the FIFO is idle. This should be set to a multiple of +// REG_FIFO_ADDR_MASK+1. Depending on the size of the memory connected, upper +// bits might be ignored. +// +localparam REG_FIFO_ADDR_BASE_LO = 'h18; +localparam REG_FIFO_ADDR_BASE_HI = 'h1C; + + +// REG_FIFO_ADDR_MASK (R|W) +// +// The byte address mask that controls the portion of the memory address that +// is allocated to this FIFO. For example, set to 0xFFFF for a 64 KiB memory. +// +// This should only be updated when the FIFO is idle. It must be equal to a +// power-of-2 minus 1. It should be no smaller than FIFO_ADDR_MASK_MIN, defined +// in axi_ram_fifo.v, otherwise it will be coerced up to that size. +// +// This is is a 64-bit register in which the least-significant 32-bit word must +// be read/written first. Depending on the size of the memory connected, the +// upper bits might be ignored. +// +localparam REG_FIFO_ADDR_MASK_LO = 'h20; +localparam REG_FIFO_ADDR_MASK_HI = 'h24; + + +// REG_FIFO_PACKET_CNT (R) +// +// Returns the number of packets transferred out of the FIFO block. +// +localparam REG_FIFO_PACKET_CNT = 'h28; + + +//----------------------------------------------------------------------------- +// BIST Registers +//----------------------------------------------------------------------------- +// +// Only read these registers if the BIST component is included. +// +//----------------------------------------------------------------------------- + +// REG_BIST_CTRL (R|W) +// +// Control register for the BIST component. +// +// [4] : BIST is running. Changes to 1 after a test is started, then returns to +// 0 when BIST is complete. +// +// [3] : Continuous mode (run until stopped). When set to 1, test will continue +// to run until Stop bit is set. +// +// [2] : Clear the BIST counters (i.e., the TX, RX, cycle, and error counters) +// +// [1] : Stop BIST (strobe). Write a 1 to this bit to stop the test that is +// currently running +// +// [0] : Start BIST (strobe). Write a 1 to this bit to start a test using the +// configured NUM_BYTES and continuous mode setting. +// +localparam REG_BIST_CTRL = 'h30; +// +localparam REG_BIST_RUNNING_POS = 4; +localparam REG_BIST_CONT_POS = 3; +localparam REG_BIST_CLEAR_POS = 2; // Strobe +localparam REG_BIST_STOP_POS = 1; // Strobe +localparam REG_BIST_START_POS = 0; // Strobe + + +// REG_BIST_CLOCK_RATE (R) +// +// Reports the clock rate of the BIST component in Hz. This can be used with +// REG_BIST_CYCLE_COUNT to calculate throughput. +// +localparam REG_BIST_CLK_RATE = 'h34; + + +// REG_BIST_NUM_BYTES (R|W) +// +// Number of bytes to generate for the next BIST run. THis is not used if the +// REG_BIST_CONT_POS bit is set. This register should not be updated while the +// BIST is running. +// +localparam REG_BIST_NUM_BYTES_LO = 'h38; +localparam REG_BIST_NUM_BYTES_HI = 'h3C; + + +// REG_BIST_TX_BYTE_COUNT (R) +// +// Reports the number of bytes transmitted by the BIST component. This should +// always be read least-significant word first to ensure coherency. Once BIST +// is complete, the TX count will equal the RX count. +// +localparam REG_BIST_TX_BYTE_COUNT_LO = 'h40; +localparam REG_BIST_TX_BYTE_COUNT_HI = 'h44; + + +// REG_BIST_RX_BYTE_COUNT (R) +// +// Reports the number of bytes received by the BIST component. This should +// always be read least-significant word first to ensure coherency. Once BIST +// is complete, the TX count will equal the RX count. +// +localparam REG_BIST_RX_BYTE_COUNT_LO = 'h48; +localparam REG_BIST_RX_BYTE_COUNT_HI = 'h4C; + + +// REG_BIST_ERROR_COUNT (R) +// +// Reports the number of words in which the BIST component detected errors. +// This should always be read least-significant word first to ensure coherency. +// +localparam REG_BIST_ERROR_COUNT_LO = 'h50; +localparam REG_BIST_ERROR_COUNT_HI = 'h54; + + +// REG_BIST_CYCLE_COUNT (R) +// +// Reports the number of clock cycles that have elapsed while the BIST was +// running. This can be used to calculate throughput. This should always be +// read least-significant word first to ensure coherency. +// +localparam REG_BIST_CYCLE_COUNT_LO = 'h58; +localparam REG_BIST_CYCLE_COUNT_HI = 'h5C; + diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/noc_shell_axi_ram_fifo.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/noc_shell_axi_ram_fifo.v new file mode 100644 index 000000000..fc353595d --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/noc_shell_axi_ram_fifo.v @@ -0,0 +1,319 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: noc_shell_axi_ram_fifo +// +// Description: A NoC Shell for the RFNoC AXI RAM FIFO. This NoC Shell +// implements the control port interface but does nothing to the +// data path other than moving it to the requested clock domain. +// + +`define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) + + +module noc_shell_axi_ram_fifo #( + parameter [31:0] NOC_ID = 32'h0, + parameter [ 9:0] THIS_PORTID = 10'd0, + parameter CHDR_W = 64, + parameter DATA_W = 64, + parameter [ 5:0] CTRL_FIFO_SIZE = 0, + parameter [ 0:0] CTRLPORT_MST_EN = 1, + parameter [ 0:0] CTRLPORT_SLV_EN = 1, + parameter [ 5:0] NUM_DATA_I = 1, + parameter [ 5:0] NUM_DATA_O = 1, + parameter [ 5:0] MTU = 10, + parameter SYNC_DATA_CLOCKS = 0 +) ( + //--------------------------------------------------------------------------- + // Framework Interface + //--------------------------------------------------------------------------- + + // RFNoC Framework Clocks and Resets + input wire rfnoc_chdr_clk, + output wire rfnoc_chdr_rst, + input wire rfnoc_ctrl_clk, + output wire rfnoc_ctrl_rst, + // RFNoC Backend Interface + input wire [ 511:0] rfnoc_core_config, + output wire [ 511:0] rfnoc_core_status, + // CHDR Input Ports (from framework) + input wire [(CHDR_W*NUM_DATA_I)-1:0] s_rfnoc_chdr_tdata, + input wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tlast, + input wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tvalid, + output wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tready, + // CHDR Output Ports (to framework) + output wire [(CHDR_W*NUM_DATA_O)-1:0] m_rfnoc_chdr_tdata, + output wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tlast, + output wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tvalid, + input wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tready, + // AXIS-Ctrl Input Port (from framework) + input wire [ 31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + // AXIS-Ctrl Output Port (to framework) + output wire [ 31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready, + + //--------------------------------------------------------------------------- + // Client Control Port Interface + //--------------------------------------------------------------------------- + + // Clock + input wire ctrlport_clk, + input wire ctrlport_rst, + // Master + output wire m_ctrlport_req_wr, + output wire m_ctrlport_req_rd, + output wire [19:0] m_ctrlport_req_addr, + output wire [31:0] m_ctrlport_req_data, + output wire [ 3:0] m_ctrlport_req_byte_en, + output wire m_ctrlport_req_has_time, + output wire [63:0] m_ctrlport_req_time, + input wire m_ctrlport_resp_ack, + input wire [ 1:0] m_ctrlport_resp_status, + input wire [31:0] m_ctrlport_resp_data, + // Slave + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [ 9:0] s_ctrlport_req_portid, + input wire [15:0] s_ctrlport_req_rem_epid, + input wire [ 9:0] s_ctrlport_req_rem_portid, + input wire [31:0] s_ctrlport_req_data, + input wire [ 3:0] s_ctrlport_req_byte_en, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + output wire s_ctrlport_resp_ack, + output wire [ 1:0] s_ctrlport_resp_status, + output wire [31:0] s_ctrlport_resp_data, + + //--------------------------------------------------------------------------- + // Client Data Interface + //--------------------------------------------------------------------------- + + // Clock + input wire axis_data_clk, + input wire axis_data_rst, + + // Output data stream (to user logic) + output wire [ (NUM_DATA_I*DATA_W)-1:0] m_axis_tdata, + output wire [(NUM_DATA_I*`MAX(DATA_W/CHDR_W, 1))-1:0] m_axis_tkeep, + output wire [ NUM_DATA_I-1:0] m_axis_tlast, + output wire [ NUM_DATA_I-1:0] m_axis_tvalid, + input wire [ NUM_DATA_I-1:0] m_axis_tready, + + // Input data stream (from user logic) + input wire [ (NUM_DATA_O*DATA_W)-1:0] s_axis_tdata, + input wire [(NUM_DATA_O*`MAX(DATA_W/CHDR_W, 1))-1:0] s_axis_tkeep, + input wire [ NUM_DATA_O-1:0] s_axis_tlast, + input wire [ NUM_DATA_O-1:0] s_axis_tvalid, + output wire [ NUM_DATA_O-1:0] s_axis_tready +); + + //--------------------------------------------------------------------------- + // Backend Interface + //--------------------------------------------------------------------------- + wire data_i_flush_en; + wire [31:0] data_i_flush_timeout; + wire [63:0] data_i_flush_active; + wire [63:0] data_i_flush_done; + wire data_o_flush_en; + wire [31:0] data_o_flush_timeout; + wire [63:0] data_o_flush_active; + wire [63:0] data_o_flush_done; + + backend_iface #( + .NOC_ID (NOC_ID), + .NUM_DATA_I (NUM_DATA_I), + .NUM_DATA_O (NUM_DATA_O), + .CTRL_FIFOSIZE (CTRL_FIFO_SIZE), + .MTU (MTU) + ) backend_iface_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk), + .rfnoc_core_config (rfnoc_core_config), + .rfnoc_core_status (rfnoc_core_status), + .rfnoc_chdr_rst (rfnoc_chdr_rst), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst), + .data_i_flush_en (data_i_flush_en), + .data_i_flush_timeout (data_i_flush_timeout), + .data_i_flush_active (data_i_flush_active), + .data_i_flush_done (data_i_flush_done), + .data_o_flush_en (data_o_flush_en), + .data_o_flush_timeout (data_o_flush_timeout), + .data_o_flush_active (data_o_flush_active), + .data_o_flush_done (data_o_flush_done) + ); + + //--------------------------------------------------------------------------- + // Control Path + //--------------------------------------------------------------------------- + + ctrlport_endpoint #( + .THIS_PORTID (THIS_PORTID ), + .SYNC_CLKS (0 ), + .AXIS_CTRL_MST_EN (CTRLPORT_SLV_EN), + .AXIS_CTRL_SLV_EN (CTRLPORT_MST_EN), + .SLAVE_FIFO_SIZE (CTRL_FIFO_SIZE ) + ) ctrlport_ep_i ( + .rfnoc_ctrl_clk (rfnoc_ctrl_clk ), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst ), + .ctrlport_clk (ctrlport_clk ), + .ctrlport_rst (ctrlport_rst ), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata ), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast ), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid ), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready ), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata ), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast ), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid ), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready ), + .m_ctrlport_req_wr (m_ctrlport_req_wr ), + .m_ctrlport_req_rd (m_ctrlport_req_rd ), + .m_ctrlport_req_addr (m_ctrlport_req_addr ), + .m_ctrlport_req_data (m_ctrlport_req_data ), + .m_ctrlport_req_byte_en (m_ctrlport_req_byte_en ), + .m_ctrlport_req_has_time (m_ctrlport_req_has_time ), + .m_ctrlport_req_time (m_ctrlport_req_time ), + .m_ctrlport_resp_ack (m_ctrlport_resp_ack ), + .m_ctrlport_resp_status (m_ctrlport_resp_status ), + .m_ctrlport_resp_data (m_ctrlport_resp_data ), + .s_ctrlport_req_wr (s_ctrlport_req_wr ), + .s_ctrlport_req_rd (s_ctrlport_req_rd ), + .s_ctrlport_req_addr (s_ctrlport_req_addr ), + .s_ctrlport_req_portid (s_ctrlport_req_portid ), + .s_ctrlport_req_rem_epid (s_ctrlport_req_rem_epid ), + .s_ctrlport_req_rem_portid(s_ctrlport_req_rem_portid), + .s_ctrlport_req_data (s_ctrlport_req_data ), + .s_ctrlport_req_byte_en (s_ctrlport_req_byte_en ), + .s_ctrlport_req_has_time (s_ctrlport_req_has_time ), + .s_ctrlport_req_time (s_ctrlport_req_time ), + .s_ctrlport_resp_ack (s_ctrlport_resp_ack ), + .s_ctrlport_resp_status (s_ctrlport_resp_status ), + .s_ctrlport_resp_data (s_ctrlport_resp_data ) + ); + + //--------------------------------------------------------------------------- + // Data Path + //--------------------------------------------------------------------------- + + // Set WORD_W to the smaller of DATA_W and CHDR_W. This will be our common + // word size between the CHDR and user data ports. + localparam WORD_W = DATA_W < CHDR_W ? DATA_W : CHDR_W; + localparam KEEP_W = `MAX(DATA_W/CHDR_W, 1); + + genvar i; + + for (i = 0; i < NUM_DATA_I; i = i + 1) begin : gen_in + wire [CHDR_W-1:0] temp_in_tdata; + wire temp_in_tlast; + wire temp_in_tvalid; + wire temp_in_tready; + + axis_packet_flush #( + .WIDTH (CHDR_W), + .FLUSH_PARTIAL_PKTS (0), + .TIMEOUT_W (32), + .PIPELINE ("IN") + ) in_packet_flush_i ( + .clk (rfnoc_chdr_clk), + .reset (rfnoc_chdr_rst), + .enable (data_i_flush_en), + .timeout (data_i_flush_timeout), + .flushing (data_i_flush_active[i]), + .done (data_i_flush_done[i]), + .s_axis_tdata (s_rfnoc_chdr_tdata[i*CHDR_W +: CHDR_W]), + .s_axis_tlast (s_rfnoc_chdr_tlast[i]), + .s_axis_tvalid (s_rfnoc_chdr_tvalid[i]), + .s_axis_tready (s_rfnoc_chdr_tready[i]), + .m_axis_tdata (temp_in_tdata), + .m_axis_tlast (temp_in_tlast), + .m_axis_tvalid (temp_in_tvalid), + .m_axis_tready (temp_in_tready) + ); + + axis_width_conv #( + .WORD_W (WORD_W), + .IN_WORDS (CHDR_W/WORD_W), + .OUT_WORDS (DATA_W/WORD_W), + .SYNC_CLKS (SYNC_DATA_CLOCKS), + .PIPELINE ("NONE") + ) in_width_conv_i ( + .s_axis_aclk (rfnoc_chdr_clk), + .s_axis_rst (rfnoc_chdr_rst), + .s_axis_tdata (temp_in_tdata), + .s_axis_tkeep ({CHDR_W/WORD_W{1'b1}}), + .s_axis_tlast (temp_in_tlast), + .s_axis_tvalid (temp_in_tvalid), + .s_axis_tready (temp_in_tready), + .m_axis_aclk (axis_data_clk), + .m_axis_rst (axis_data_rst), + .m_axis_tdata (m_axis_tdata[i*DATA_W +: DATA_W]), + .m_axis_tkeep (m_axis_tkeep[i*KEEP_W +: KEEP_W]), + .m_axis_tlast (m_axis_tlast[i]), + .m_axis_tvalid (m_axis_tvalid[i]), + .m_axis_tready (m_axis_tready[i]) + ); + end + + + for (i = 0; i < NUM_DATA_O; i = i + 1) begin : gen_out + wire [ CHDR_W-1:0] temp_out_tdata; + wire [CHDR_W/WORD_W-1:0] temp_out_tkeep; + wire temp_out_tlast; + wire temp_out_tvalid; + wire temp_out_tready; + + axis_width_conv #( + .WORD_W (WORD_W), + .IN_WORDS (DATA_W/WORD_W), + .OUT_WORDS (CHDR_W/WORD_W), + .SYNC_CLKS (SYNC_DATA_CLOCKS), + .PIPELINE ("NONE") + ) out_width_conv_i ( + .s_axis_aclk (axis_data_clk), + .s_axis_rst (axis_data_rst), + .s_axis_tdata (s_axis_tdata[i*DATA_W +: DATA_W]), + .s_axis_tkeep (s_axis_tkeep[i*KEEP_W +: KEEP_W]), + .s_axis_tlast (s_axis_tlast[i]), + .s_axis_tvalid (s_axis_tvalid[i]), + .s_axis_tready (s_axis_tready[i]), + .m_axis_aclk (rfnoc_chdr_clk), + .m_axis_rst (rfnoc_chdr_rst), + .m_axis_tdata (temp_out_tdata), + .m_axis_tkeep (), + .m_axis_tlast (temp_out_tlast), + .m_axis_tvalid (temp_out_tvalid), + .m_axis_tready (temp_out_tready) + ); + + axis_packet_flush #( + .WIDTH (CHDR_W), + .FLUSH_PARTIAL_PKTS (0), + .TIMEOUT_W (32), + .PIPELINE ("OUT") + ) out_packet_flush_i ( + .clk (rfnoc_chdr_clk), + .reset (rfnoc_chdr_rst), + .enable (data_o_flush_en), + .timeout (data_o_flush_timeout), + .flushing (data_o_flush_active[i]), + .done (data_o_flush_done[i]), + .s_axis_tdata (temp_out_tdata), + .s_axis_tlast (temp_out_tlast), + .s_axis_tvalid (temp_out_tvalid), + .s_axis_tready (temp_out_tready), + .m_axis_tdata (m_rfnoc_chdr_tdata[i*CHDR_W +: CHDR_W]), + .m_axis_tlast (m_rfnoc_chdr_tlast[i]), + .m_axis_tvalid (m_rfnoc_chdr_tvalid[i]), + .m_axis_tready (m_rfnoc_chdr_tready[i]) + ); + + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/rfnoc_block_axi_ram_fifo.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/rfnoc_block_axi_ram_fifo.v new file mode 100644 index 000000000..04d942ce0 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/rfnoc_block_axi_ram_fifo.v @@ -0,0 +1,485 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_axi_ram_fifo +// +// Description: +// +// Implements a FIFO using an AXI memory-mapped interface to an external +// memory. +// +// Parameters: +// +// THIS_PORTID : Control crossbar port to which this block is connected +// +// CHDR_W : CHDR AXI-Stream data bus width +// +// NUM_PORTS : Number of independent FIFOs to support, all sharing the +// same memory. +// +// MTU : Maximum transfer unit (maximum packet size) to support, +// in CHDR_W-sized words. +// +// MEM_DATA_W : Width of the data bus to use for the AXI memory-mapped +// interface. This must be no bigger than CHDR_W and it must +// evenly divide CHDR_W. +// +// MEM_ADDR_W : Width of the byte address to use for RAM addressing. This +// effectively sets the maximum combined size of all FIFOs. +// This must be less than or equal to AWIDTH. +// +// AWIDTH : Width of the address bus for the AXI memory-mapped +// interface. This must be at least as big as MEM_DATA_W. +// +// FIFO_ADDR_BASE : Default base byte address of each FIFO. When NUM_PORTS > +// 1, this should be the concatenation of all the FIFO base +// addresses. These values can be reconfigured by software. +// +// FIFO_ADDR_MASK : Default byte address mask used by each FIFO. It must be +// all ones. The size of the FIFO in bytes will be this +// minus one. These values can be reconfigured by software. +// +// BURST_TIMEOUT : Default number of memory clock cycles to wait for new +// data before performing a short, sub-optimal burst. One +// value per FIFO. +// +// IN_FIFO_SIZE : Size of the input buffer. This is used to mitigate the +// effects of memory write latency, which can be significant +// when the external memory is DRAM. +// +// OUT_FIFO_SIZE : Size of the output buffer. This is used to mitigate the +// effects of memory read latency, which can be significant +// when the external memory is DRAM. +// +// BIST : Includes BIST logic when true. +// +// MEM_CLK_RATE : Frequency of mem_clk in Hz. This is used by BIST for +// throughput calculation. +// + +module rfnoc_block_axi_ram_fifo #( + parameter THIS_PORTID = 0, + parameter CHDR_W = 64, + parameter NUM_PORTS = 1, + parameter MTU = 10, + parameter MEM_DATA_W = CHDR_W, + parameter MEM_ADDR_W = 32, + parameter AWIDTH = 32, + parameter [NUM_PORTS*MEM_ADDR_W-1:0] FIFO_ADDR_BASE = {NUM_PORTS{ {MEM_ADDR_W{1'b0}} }}, + parameter [NUM_PORTS*MEM_ADDR_W-1:0] FIFO_ADDR_MASK = {NUM_PORTS{ {(MEM_ADDR_W-$clog2(NUM_PORTS)){1'b1}} }}, + parameter [ NUM_PORTS*32-1:0] BURST_TIMEOUT = {NUM_PORTS{ 32'd256 }}, + parameter IN_FIFO_SIZE = 11, + parameter OUT_FIFO_SIZE = 11, + parameter BIST = 1, + parameter MEM_CLK_RATE = 200e6 +) ( + //--------------------------------------------------------------------------- + // AXIS CHDR Port + //--------------------------------------------------------------------------- + + input wire rfnoc_chdr_clk, + + // CHDR inputs from framework + input wire [NUM_PORTS*CHDR_W-1:0] s_rfnoc_chdr_tdata, + input wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tlast, + input wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tvalid, + output wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tready, + + // CHDR outputs to framework + output wire [NUM_PORTS*CHDR_W-1:0] m_rfnoc_chdr_tdata, + output wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tlast, + output wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tvalid, + input wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tready, + + // Backend interface + input wire [511:0] rfnoc_core_config, + output wire [511:0] rfnoc_core_status, + + + //--------------------------------------------------------------------------- + // AXIS CTRL Port + //--------------------------------------------------------------------------- + + input wire rfnoc_ctrl_clk, + + // CTRL port requests from framework + input wire [31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + + // CTRL port requests to framework + output wire [31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready, + + + //--------------------------------------------------------------------------- + // AXI Memory Mapped Interface + //--------------------------------------------------------------------------- + + // AXI Interface Clock and Reset + input wire mem_clk, + input wire axi_rst, + + // AXI Write Address Channel + output wire [ NUM_PORTS*1-1:0] m_axi_awid, // Write address ID. This signal is the identification tag for the write address signals + output wire [ NUM_PORTS*AWIDTH-1:0] m_axi_awaddr, // Write address. The write address gives the address of the first transfer in a write burst + output wire [ NUM_PORTS*8-1:0] m_axi_awlen, // Burst length. The burst length gives the exact number of transfers in a burst. + output wire [ NUM_PORTS*3-1:0] m_axi_awsize, // Burst size. This signal indicates the size of each transfer in the burst. + output wire [ NUM_PORTS*2-1:0] m_axi_awburst, // Burst type. The burst type and the size information, determine how the address is calculated + output wire [ NUM_PORTS*1-1:0] m_axi_awlock, // Lock type. Provides additional information about the atomic characteristics of the transfer. + output wire [ NUM_PORTS*4-1:0] m_axi_awcache, // Memory type. This signal indicates how transactions are required to progress + output wire [ NUM_PORTS*3-1:0] m_axi_awprot, // Protection type. This signal indicates the privilege and security level of the transaction + output wire [ NUM_PORTS*4-1:0] m_axi_awqos, // Quality of Service, QoS. The QoS identifier sent for each write transaction + output wire [ NUM_PORTS*4-1:0] m_axi_awregion, // Region identifier. Permits a single physical interface on a slave to be re-used. + output wire [ NUM_PORTS*1-1:0] m_axi_awuser, // User signal. Optional User-defined signal in the write address channel. + output wire [ NUM_PORTS*1-1:0] m_axi_awvalid, // Write address valid. This signal indicates that the channel is signaling valid write addr + input wire [ NUM_PORTS*1-1:0] m_axi_awready, // Write address ready. This signal indicates that the slave is ready to accept an address + + // AXI Write Data Channel + output wire [ NUM_PORTS*MEM_DATA_W-1:0] m_axi_wdata, // Write data + output wire [NUM_PORTS*MEM_DATA_W/8-1:0] m_axi_wstrb, // Write strobes. This signal indicates which byte lanes hold valid data. + output wire [ NUM_PORTS*1-1:0] m_axi_wlast, // Write last. This signal indicates the last transfer in a write burst + output wire [ NUM_PORTS*1-1:0] m_axi_wuser, // User signal. Optional User-defined signal in the write data channel. + output wire [ NUM_PORTS*1-1:0] m_axi_wvalid, // Write valid. This signal indicates that valid write data and strobes are available. + input wire [ NUM_PORTS*1-1:0] m_axi_wready, // Write ready. This signal indicates that the slave can accept the write data. + + // AXI Write Response Channel + input wire [ NUM_PORTS*1-1:0] m_axi_bid, // Response ID tag. This signal is the ID tag of the write response. + input wire [ NUM_PORTS*2-1:0] m_axi_bresp, // Write response. This signal indicates the status of the write transaction. + input wire [ NUM_PORTS*1-1:0] m_axi_buser, // User signal. Optional User-defined signal in the write response channel. + input wire [ NUM_PORTS*1-1:0] m_axi_bvalid, // Write response valid. This signal indicates that the channel is signaling a valid response + output wire [ NUM_PORTS*1-1:0] m_axi_bready, // Response ready. This signal indicates that the master can accept a write response + + // AXI Read Address Channel + output wire [ NUM_PORTS*1-1:0] m_axi_arid, // Read address ID. This signal is the identification tag for the read address group of signals + output wire [ NUM_PORTS*AWIDTH-1:0] m_axi_araddr, // Read address. The read address gives the address of the first transfer in a read burst + output wire [ NUM_PORTS*8-1:0] m_axi_arlen, // Burst length. This signal indicates the exact number of transfers in a burst. + output wire [ NUM_PORTS*3-1:0] m_axi_arsize, // Burst size. This signal indicates the size of each transfer in the burst. + output wire [ NUM_PORTS*2-1:0] m_axi_arburst, // Burst type. The burst type and the size information determine how the address for each transfer + output wire [ NUM_PORTS*1-1:0] m_axi_arlock, // Lock type. This signal provides additional information about the atomic characteristics + output wire [ NUM_PORTS*4-1:0] m_axi_arcache, // Memory type. This signal indicates how transactions are required to progress + output wire [ NUM_PORTS*3-1:0] m_axi_arprot, // Protection type. This signal indicates the privilege and security level of the transaction + output wire [ NUM_PORTS*4-1:0] m_axi_arqos, // Quality of Service, QoS. QoS identifier sent for each read transaction. + output wire [ NUM_PORTS*4-1:0] m_axi_arregion, // Region identifier. Permits a single physical interface on a slave to be re-used + output wire [ NUM_PORTS*1-1:0] m_axi_aruser, // User signal. Optional User-defined signal in the read address channel. + output wire [ NUM_PORTS*1-1:0] m_axi_arvalid, // Read address valid. This signal indicates that the channel is signaling valid read addr + input wire [ NUM_PORTS*1-1:0] m_axi_arready, // Read address ready. This signal indicates that the slave is ready to accept an address + + // AXI Read Data Channel + input wire [ NUM_PORTS*1-1:0] m_axi_rid, // Read ID tag. This signal is the identification tag for the read data group of signals + input wire [NUM_PORTS*MEM_DATA_W-1:0] m_axi_rdata, // Read data. + input wire [ NUM_PORTS*2-1:0] m_axi_rresp, // Read response. This signal indicates the status of the read transfer + input wire [ NUM_PORTS*1-1:0] m_axi_rlast, // Read last. This signal indicates the last transfer in a read burst. + input wire [ NUM_PORTS*1-1:0] m_axi_ruser, // User signal. Optional User-defined signal in the read data channel. + input wire [ NUM_PORTS*1-1:0] m_axi_rvalid, // Read valid. This signal indicates that the channel is signaling the required read data. + output wire [ NUM_PORTS*1-1:0] m_axi_rready // Read ready. This signal indicates that the master can accept the read data and response +); + + `include "axi_ram_fifo_regs.vh" + + localparam NOC_ID = 'hF1F0_0000; + + // If the memory width is larger than the CHDR width, then we need to use + // tkeep to track which CHDR words are valid as they go through the FIFO. + // Calculate the TKEEP width here. Set to 1 if it's not needed. + localparam KEEP_W = (MEM_DATA_W/CHDR_W) > 1 ? (MEM_DATA_W/CHDR_W) : 1; + + + //--------------------------------------------------------------------------- + // Parameter Checks + //--------------------------------------------------------------------------- + + if (CHDR_W % MEM_DATA_W != 0 && MEM_DATA_W % CHDR_W != 0) + CHDR_W_must_be_a_multiple_of_MEM_DATA_W_or_vice_versa(); + + if (MEM_ADDR_W > AWIDTH) + MEM_ADDR_W_must_be_greater_than_AWIDTH(); + + + //--------------------------------------------------------------------------- + // NoC Shell + //--------------------------------------------------------------------------- + + wire rfnoc_chdr_rst; + + wire ctrlport_req_wr; + wire ctrlport_req_rd; + wire [19:0] ctrlport_req_addr; + wire [31:0] ctrlport_req_data; + wire ctrlport_resp_ack; + wire [31:0] ctrlport_resp_data; + + wire [NUM_PORTS*MEM_DATA_W-1:0] m_axis_data_tdata; + wire [ NUM_PORTS*KEEP_W-1:0] m_axis_data_tkeep; + wire [ NUM_PORTS-1:0] m_axis_data_tlast; + wire [ NUM_PORTS-1:0] m_axis_data_tvalid; + wire [ NUM_PORTS-1:0] m_axis_data_tready; + + wire [NUM_PORTS*MEM_DATA_W-1:0] s_axis_data_tdata; + wire [ NUM_PORTS*KEEP_W-1:0] s_axis_data_tkeep; + wire [ NUM_PORTS-1:0] s_axis_data_tlast; + wire [ NUM_PORTS-1:0] s_axis_data_tvalid; + wire [ NUM_PORTS-1:0] s_axis_data_tready; + + noc_shell_axi_ram_fifo #( + .NOC_ID (NOC_ID), + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W), + .DATA_W (MEM_DATA_W), + .CTRL_FIFO_SIZE (5), + .CTRLPORT_MST_EN (1), + .CTRLPORT_SLV_EN (0), + .NUM_DATA_I (NUM_PORTS), + .NUM_DATA_O (NUM_PORTS), + .MTU (MTU), + .SYNC_DATA_CLOCKS (0) + ) noc_shell_axi_ram_fifo_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .rfnoc_chdr_rst (rfnoc_chdr_rst), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk), + .rfnoc_ctrl_rst (), + .rfnoc_core_config (rfnoc_core_config), + .rfnoc_core_status (rfnoc_core_status), + .s_rfnoc_chdr_tdata (s_rfnoc_chdr_tdata), + .s_rfnoc_chdr_tlast (s_rfnoc_chdr_tlast), + .s_rfnoc_chdr_tvalid (s_rfnoc_chdr_tvalid), + .s_rfnoc_chdr_tready (s_rfnoc_chdr_tready), + .m_rfnoc_chdr_tdata (m_rfnoc_chdr_tdata), + .m_rfnoc_chdr_tlast (m_rfnoc_chdr_tlast), + .m_rfnoc_chdr_tvalid (m_rfnoc_chdr_tvalid), + .m_rfnoc_chdr_tready (m_rfnoc_chdr_tready), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready), + .ctrlport_clk (mem_clk), + .ctrlport_rst (axi_rst), + .m_ctrlport_req_wr (ctrlport_req_wr), + .m_ctrlport_req_rd (ctrlport_req_rd), + .m_ctrlport_req_addr (ctrlport_req_addr), + .m_ctrlport_req_data (ctrlport_req_data), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (), + .m_ctrlport_req_time (), + .m_ctrlport_resp_ack (ctrlport_resp_ack), + .m_ctrlport_resp_status (2'b0), + .m_ctrlport_resp_data (ctrlport_resp_data), + .s_ctrlport_req_wr (1'b0), + .s_ctrlport_req_rd (1'b0), + .s_ctrlport_req_addr (20'b0), + .s_ctrlport_req_portid (10'b0), + .s_ctrlport_req_rem_epid (16'b0), + .s_ctrlport_req_rem_portid (10'b0), + .s_ctrlport_req_data (32'b0), + .s_ctrlport_req_byte_en (4'b0), + .s_ctrlport_req_has_time (1'b0), + .s_ctrlport_req_time (64'b0), + .s_ctrlport_resp_ack (), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (), + .axis_data_clk (mem_clk), + .axis_data_rst (axi_rst), + .m_axis_tdata (m_axis_data_tdata), + .m_axis_tkeep (m_axis_data_tkeep), + .m_axis_tlast (m_axis_data_tlast), + .m_axis_tvalid (m_axis_data_tvalid), + .m_axis_tready (m_axis_data_tready), + .s_axis_tdata (s_axis_data_tdata), + .s_axis_tkeep (s_axis_data_tkeep), + .s_axis_tlast (s_axis_data_tlast), + .s_axis_tvalid (s_axis_data_tvalid), + .s_axis_tready (s_axis_data_tready) + ); + + wire rfnoc_chdr_rst_mem_clk; + reg mem_rst_block; + + // Cross the CHDR reset to the mem_clk domain + pulse_synchronizer #( + .MODE ("POSEDGE") + ) ctrl_rst_sync_i ( + .clk_a (rfnoc_chdr_clk), + .rst_a (1'b0), + .pulse_a (rfnoc_chdr_rst), + .busy_a (), + .clk_b (mem_clk), + .pulse_b (rfnoc_chdr_rst_mem_clk) + ); + + // Combine the resets in a glitch-free manner + always @(posedge mem_clk) begin + mem_rst_block <= axi_rst | rfnoc_chdr_rst_mem_clk; + end + + + //--------------------------------------------------------------------------- + // CTRL Port Splitter + //--------------------------------------------------------------------------- + + wire [ NUM_PORTS-1:0] m_ctrlport_req_wr; + wire [ NUM_PORTS-1:0] m_ctrlport_req_rd; + wire [20*NUM_PORTS-1:0] m_ctrlport_req_addr; + wire [32*NUM_PORTS-1:0] m_ctrlport_req_data; + wire [ NUM_PORTS-1:0] m_ctrlport_resp_ack; + wire [32*NUM_PORTS-1:0] m_ctrlport_resp_data; + + ctrlport_decoder #( + .NUM_SLAVES (NUM_PORTS), + .BASE_ADDR (0), + .SLAVE_ADDR_W (RAM_FIFO_ADDR_W) + ) ctrlport_splitter_i ( + .ctrlport_clk (mem_clk), + .ctrlport_rst (mem_rst_block), + .s_ctrlport_req_wr (ctrlport_req_wr), + .s_ctrlport_req_rd (ctrlport_req_rd), + .s_ctrlport_req_addr (ctrlport_req_addr), + .s_ctrlport_req_data (ctrlport_req_data), + .s_ctrlport_req_byte_en (4'b1111), + .s_ctrlport_req_has_time (1'b0), + .s_ctrlport_req_time (64'b0), + .s_ctrlport_resp_ack (ctrlport_resp_ack), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (ctrlport_resp_data), + .m_ctrlport_req_wr (m_ctrlport_req_wr), + .m_ctrlport_req_rd (m_ctrlport_req_rd), + .m_ctrlport_req_addr (m_ctrlport_req_addr), + .m_ctrlport_req_data (m_ctrlport_req_data), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (), + .m_ctrlport_req_time (), + .m_ctrlport_resp_ack (m_ctrlport_resp_ack), + .m_ctrlport_resp_status ({NUM_PORTS*2{1'b0}}), + .m_ctrlport_resp_data (m_ctrlport_resp_data) + ); + + + //--------------------------------------------------------------------------- + // FIFO Instances + //--------------------------------------------------------------------------- + + genvar i; + for (i = 0; i < NUM_PORTS; i = i + 1) begin : gen_ram_fifos + + wire [MEM_ADDR_W-1:0] m_axi_awaddr_int; + wire [MEM_ADDR_W-1:0] m_axi_araddr_int; + + // Resize the addresses from MEM_ADDR_W to AWIDTH + assign m_axi_awaddr[(AWIDTH*(i+1))-1:AWIDTH*i] = m_axi_awaddr_int; + assign m_axi_araddr[(AWIDTH*(i+1))-1:AWIDTH*i] = m_axi_araddr_int; + + axi_ram_fifo #( + .MEM_ADDR_W (MEM_ADDR_W), + .MEM_DATA_W (MEM_DATA_W), + .KEEP_W (KEEP_W), + .FIFO_ADDR_BASE (FIFO_ADDR_BASE[MEM_ADDR_W*i +: MEM_ADDR_W]), + .FIFO_ADDR_MASK (FIFO_ADDR_MASK[MEM_ADDR_W*i +: MEM_ADDR_W]), + .BURST_TIMEOUT (BURST_TIMEOUT[32*i +: 32]), + .BIST (BIST), + .CLK_RATE (MEM_CLK_RATE), + .IN_FIFO_SIZE (IN_FIFO_SIZE), + .OUT_FIFO_SIZE (OUT_FIFO_SIZE) + ) axi_ram_fifo_i ( + + .clk(mem_clk), + .rst(mem_rst_block), + + //----------------------------------------------------------------------- + // Control Port + //----------------------------------------------------------------------- + + .s_ctrlport_req_wr (m_ctrlport_req_wr[i]), + .s_ctrlport_req_rd (m_ctrlport_req_rd[i]), + .s_ctrlport_req_addr (m_ctrlport_req_addr[20*i +: 20]), + .s_ctrlport_req_data (m_ctrlport_req_data[32*i +: 32]), + .s_ctrlport_resp_ack (m_ctrlport_resp_ack[i]), + .s_ctrlport_resp_data (m_ctrlport_resp_data[32*i +: 32]), + + //----------------------------------------------------------------------- + // AXI-Stream FIFO Interface + //----------------------------------------------------------------------- + + // AXI-Stream Input + .s_tdata (m_axis_data_tdata[MEM_DATA_W*i +: MEM_DATA_W]), + .s_tkeep (m_axis_data_tkeep[KEEP_W*i +: KEEP_W]), + .s_tlast (m_axis_data_tlast[i]), + .s_tvalid (m_axis_data_tvalid[i]), + .s_tready (m_axis_data_tready[i]), + // + // AXI-Stream Output + .m_tdata (s_axis_data_tdata[MEM_DATA_W*i +: MEM_DATA_W]), + .m_tkeep (s_axis_data_tkeep[KEEP_W*i +: KEEP_W]), + .m_tlast (s_axis_data_tlast[i]), + .m_tvalid (s_axis_data_tvalid[i]), + .m_tready (s_axis_data_tready[i]), + + //----------------------------------------------------------------------- + // AXI4 Memory Interface + //----------------------------------------------------------------------- + + // AXI Write address channel + .m_axi_awid (m_axi_awid[i]), + .m_axi_awaddr (m_axi_awaddr_int), + .m_axi_awlen (m_axi_awlen[(8*(i+1))-1:8*i]), + .m_axi_awsize (m_axi_awsize[(3*(i+1))-1:3*i]), + .m_axi_awburst (m_axi_awburst[(2*(i+1))-1:2*i]), + .m_axi_awlock (m_axi_awlock[i]), + .m_axi_awcache (m_axi_awcache[(4*(i+1))-1:4*i]), + .m_axi_awprot (m_axi_awprot[(3*(i+1))-1:3*i]), + .m_axi_awqos (m_axi_awqos[(4*(i+1))-1:4*i]), + .m_axi_awregion (m_axi_awregion[(4*(i+1))-1:4*i]), + .m_axi_awuser (m_axi_awuser[i]), + .m_axi_awvalid (m_axi_awvalid[i]), + .m_axi_awready (m_axi_awready[i]), + // + // AXI Write data channel. + .m_axi_wdata (m_axi_wdata[(MEM_DATA_W*(i+1))-1:MEM_DATA_W*i]), + .m_axi_wstrb (m_axi_wstrb[((MEM_DATA_W/8)*(i+1))-1:(MEM_DATA_W/8)*i]), + .m_axi_wlast (m_axi_wlast[i]), + .m_axi_wuser (m_axi_wuser[i]), + .m_axi_wvalid (m_axi_wvalid[i]), + .m_axi_wready (m_axi_wready[i]), + // + // AXI Write response channel signals + .m_axi_bid (m_axi_bid[i]), + .m_axi_bresp (m_axi_bresp[(2*(i+1))-1:2*i]), + .m_axi_buser (m_axi_buser[i]), + .m_axi_bvalid (m_axi_bvalid[i]), + .m_axi_bready (m_axi_bready[i]), + // + // AXI Read address channel + .m_axi_arid (m_axi_arid[i]), + .m_axi_araddr (m_axi_araddr_int), + .m_axi_arlen (m_axi_arlen[(8*(i+1))-1:8*i]), + .m_axi_arsize (m_axi_arsize[(3*(i+1))-1:3*i]), + .m_axi_arburst (m_axi_arburst[(2*(i+1))-1:2*i]), + .m_axi_arlock (m_axi_arlock[i]), + .m_axi_arcache (m_axi_arcache[(4*(i+1))-1:4*i]), + .m_axi_arprot (m_axi_arprot[(3*(i+1))-1:3*i]), + .m_axi_arqos (m_axi_arqos[(4*(i+1))-1:4*i]), + .m_axi_arregion (m_axi_arregion[(4*(i+1))-1:4*i]), + .m_axi_aruser (m_axi_aruser[i]), + .m_axi_arvalid (m_axi_arvalid[i]), + .m_axi_arready (m_axi_arready[i]), + // + // AXI Read data channel + .m_axi_rid (m_axi_rid[i]), + .m_axi_rdata (m_axi_rdata[(MEM_DATA_W*(i+1))-1:MEM_DATA_W*i]), + .m_axi_rresp (m_axi_rresp[(2*(i+1))-1:2*i]), + .m_axi_rlast (m_axi_rlast[i]), + .m_axi_ruser (m_axi_ruser[i]), + .m_axi_rvalid (m_axi_rvalid[i]), + .m_axi_rready (m_axi_rready[i]) + ); + + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/rfnoc_block_axi_ram_fifo_all_tb.sv b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/rfnoc_block_axi_ram_fifo_all_tb.sv new file mode 100644 index 000000000..575c600f9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/rfnoc_block_axi_ram_fifo_all_tb.sv @@ -0,0 +1,70 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_axi_ram_fifo_all_tb +// +// Description: +// +// This is the testbench for rfnoc_block_axi_ram_fifo that instantiates +// several variations of rfnoc_block_axi_ram_fifo_tb to test different +// configurations. +// + + +module rfnoc_block_axi_ram_fifo_all_tb; + + timeunit 1ns; + timeprecision 1ps; + + import PkgTestExec::*; + + + //--------------------------------------------------------------------------- + // Test Definitions + //--------------------------------------------------------------------------- + + typedef struct { + int CHDR_W; + int NUM_PORTS; + int MEM_DATA_W; + int MEM_ADDR_W; + int FIFO_ADDR_W; + int IN_FIFO_SIZE; + int OUT_FIFO_SIZE; + bit OVERFLOW; + bit BIST; + } test_config_t; + + localparam NUM_TESTS = 4; + + localparam test_config_t test[NUM_TESTS] = '{ + '{CHDR_W: 64, NUM_PORTS: 2, MEM_DATA_W: 64, MEM_ADDR_W: 13, FIFO_ADDR_W: 12, IN_FIFO_SIZE: 9, OUT_FIFO_SIZE: 9, OVERFLOW: 1, BIST: 1 }, + '{CHDR_W: 64, NUM_PORTS: 1, MEM_DATA_W: 128, MEM_ADDR_W: 14, FIFO_ADDR_W: 13, IN_FIFO_SIZE: 9, OUT_FIFO_SIZE: 9, OVERFLOW: 1, BIST: 1 }, + '{CHDR_W: 128, NUM_PORTS: 1, MEM_DATA_W: 64, MEM_ADDR_W: 13, FIFO_ADDR_W: 12, IN_FIFO_SIZE: 9, OUT_FIFO_SIZE: 10, OVERFLOW: 0, BIST: 1 }, + '{CHDR_W: 128, NUM_PORTS: 1, MEM_DATA_W: 128, MEM_ADDR_W: 16, FIFO_ADDR_W: 14, IN_FIFO_SIZE: 12, OUT_FIFO_SIZE: 12, OVERFLOW: 0, BIST: 0 } + }; + + + //--------------------------------------------------------------------------- + // DUT Instances + //--------------------------------------------------------------------------- + + genvar i; + for (i = 0; i < NUM_TESTS; i++) begin : gen_test_config + rfnoc_block_axi_ram_fifo_tb #( + .CHDR_W (test[i].CHDR_W), + .NUM_PORTS (test[i].NUM_PORTS), + .MEM_DATA_W (test[i].MEM_DATA_W), + .MEM_ADDR_W (test[i].MEM_ADDR_W), + .FIFO_ADDR_W (test[i].FIFO_ADDR_W), + .IN_FIFO_SIZE (test[i].IN_FIFO_SIZE), + .OUT_FIFO_SIZE (test[i].OUT_FIFO_SIZE), + .OVERFLOW (test[i].OVERFLOW), + .BIST (test[i].BIST) + ) rfnoc_block_radio_tb_i (); + end : gen_test_config + + +endmodule : rfnoc_block_axi_ram_fifo_all_tb
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/rfnoc_block_axi_ram_fifo_tb.sv b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/rfnoc_block_axi_ram_fifo_tb.sv new file mode 100644 index 000000000..49e184ce0 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/rfnoc_block_axi_ram_fifo_tb.sv @@ -0,0 +1,1114 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_axi_ram_fifo_tb +// +// Description: Testbench for rfnoc_block_axi_ram_fifo +// + + +module rfnoc_block_axi_ram_fifo_tb #( + parameter int CHDR_W = 64, + parameter int NUM_PORTS = 2, + parameter int MEM_DATA_W = 64, + parameter int MEM_ADDR_W = 13, + parameter int FIFO_ADDR_W = 12, + parameter int IN_FIFO_SIZE = 9, + parameter int OUT_FIFO_SIZE = 9, + parameter bit OVERFLOW = 1, + parameter bit BIST = 1 +); + + // Include macros and time declarations for use with PkgTestExec + `include "test_exec.svh" + + import PkgTestExec::*; + import PkgChdrUtils::*; + import PkgRfnocBlockCtrlBfm::*; + + `include "axi_ram_fifo_regs.vh" + + + //--------------------------------------------------------------------------- + // Local Parameters + //--------------------------------------------------------------------------- + + // Simulation parameters + localparam int USE_RANDOM = 1; // Use random simulation data (not sequential) + localparam real CHDR_CLK_PER = 5.333333; // CHDR clock rate + localparam real CTRL_CLK_PER = 10.0; // CTRL clock rate + localparam real MEM_CLK_PER = 3.333333; // Memory clock rate + localparam int SPP = 256; // Samples per packet + localparam int PKT_SIZE_BYTES = SPP*4; // Bytes per packet + localparam int STALL_PROB = 25; // BFM stall probability + + // Block configuration + localparam int THIS_PORTID = 'h123; + localparam int MTU = 12; + localparam int NUM_HB = 3; + localparam int CIC_MAX_DECIM = 255; + localparam int BURST_TIMEOUT = 64; + localparam int MEM_CLK_RATE = int'(1.0e9/MEM_CLK_PER); // Frequency in Hz + localparam int AWIDTH = MEM_ADDR_W+1; + + // Put FIFO 0 at the bottom of the memory and FIFO 1 immediately above it. + localparam bit [MEM_ADDR_W-1:0] FIFO_ADDR_BASE_0 = 0; + localparam bit [MEM_ADDR_W-1:0] FIFO_ADDR_BASE_1 = 2**FIFO_ADDR_W; + localparam bit [MEM_ADDR_W-1:0] FIFO_ADDR_MASK = 2**FIFO_ADDR_W-1; + + + //--------------------------------------------------------------------------- + // Clocks + //--------------------------------------------------------------------------- + + bit rfnoc_chdr_clk; + bit rfnoc_ctrl_clk; + bit mem_clk, mem_rst; + + // Don't start the clocks automatically (AUTOSTART=0), since we expect + // multiple instances of this testbench to run in sequence. They will be + // started before the first test. + sim_clock_gen #(.PERIOD(CHDR_CLK_PER), .AUTOSTART(0)) + rfnoc_chdr_clk_gen (.clk(rfnoc_chdr_clk), .rst()); + sim_clock_gen #(.PERIOD(CTRL_CLK_PER), .AUTOSTART(0)) + rfnoc_ctrl_clk_gen (.clk(rfnoc_ctrl_clk), .rst()); + sim_clock_gen #(.PERIOD(MEM_CLK_PER), .AUOSTART(0)) + mem_clk_gen (.clk(mem_clk), .rst(mem_rst)); + + + //--------------------------------------------------------------------------- + // Bus Functional Models + //--------------------------------------------------------------------------- + + RfnocBackendIf backend (rfnoc_chdr_clk, rfnoc_ctrl_clk); + AxiStreamIf #(32) m_ctrl (rfnoc_ctrl_clk, 1'b0); + AxiStreamIf #(32) s_ctrl (rfnoc_ctrl_clk, 1'b0); + AxiStreamIf #(CHDR_W) m_chdr [NUM_PORTS] (rfnoc_chdr_clk, 1'b0); + AxiStreamIf #(CHDR_W) s_chdr [NUM_PORTS] (rfnoc_chdr_clk, 1'b0); + + // Bus functional model for a software block controller + RfnocBlockCtrlBfm #(.CHDR_W(CHDR_W)) blk_ctrl = + new(backend, m_ctrl, s_ctrl); + + // Connect block controller to BFMs + for (genvar i = 0; i < NUM_PORTS; i++) begin : gen_bfm_connections + initial begin + blk_ctrl.connect_master_data_port(i, m_chdr[i], PKT_SIZE_BYTES); + blk_ctrl.connect_slave_data_port(i, s_chdr[i]); + blk_ctrl.set_master_stall_prob(i, STALL_PROB); + blk_ctrl.set_slave_stall_prob(i, STALL_PROB); + end + end + + + //--------------------------------------------------------------------------- + // AXI Memory Model + //--------------------------------------------------------------------------- + + // AXI Write Address Channel + wire [ NUM_PORTS*1-1:0] m_axi_awid; + wire [ NUM_PORTS*AWIDTH-1:0] m_axi_awaddr; + wire [ NUM_PORTS*8-1:0] m_axi_awlen; + wire [ NUM_PORTS*3-1:0] m_axi_awsize; + wire [ NUM_PORTS*2-1:0] m_axi_awburst; + wire [ NUM_PORTS*1-1:0] m_axi_awlock; // Unused master output + wire [ NUM_PORTS*4-1:0] m_axi_awcache; // Unused master output + wire [ NUM_PORTS*3-1:0] m_axi_awprot; // Unused master output + wire [ NUM_PORTS*4-1:0] m_axi_awqos; // Unused master output + wire [ NUM_PORTS*4-1:0] m_axi_awregion; // Unused master output + wire [ NUM_PORTS*1-1:0] m_axi_awuser; // Unused master output + wire [ NUM_PORTS*1-1:0] m_axi_awvalid; + wire [ NUM_PORTS*1-1:0] m_axi_awready; + // AXI Write Data Channel + wire [ NUM_PORTS*MEM_DATA_W-1:0] m_axi_wdata; + wire [NUM_PORTS*MEM_DATA_W/8-1:0] m_axi_wstrb; + wire [ NUM_PORTS*1-1:0] m_axi_wlast; + wire [ NUM_PORTS*1-1:0] m_axi_wuser; // Unused master output + wire [ NUM_PORTS*1-1:0] m_axi_wvalid; + wire [ NUM_PORTS*1-1:0] m_axi_wready; + // AXI Write Response Channel + wire [ NUM_PORTS*1-1:0] m_axi_bid; + wire [ NUM_PORTS*2-1:0] m_axi_bresp; + wire [ NUM_PORTS*1-1:0] m_axi_buser; // Unused master input + wire [ NUM_PORTS*1-1:0] m_axi_bvalid; + wire [ NUM_PORTS*1-1:0] m_axi_bready; + // AXI Read Address Channel + wire [ NUM_PORTS*1-1:0] m_axi_arid; + wire [ NUM_PORTS*AWIDTH-1:0] m_axi_araddr; + wire [ NUM_PORTS*8-1:0] m_axi_arlen; + wire [ NUM_PORTS*3-1:0] m_axi_arsize; + wire [ NUM_PORTS*2-1:0] m_axi_arburst; + wire [ NUM_PORTS*1-1:0] m_axi_arlock; // Unused master output + wire [ NUM_PORTS*4-1:0] m_axi_arcache; // Unused master output + wire [ NUM_PORTS*3-1:0] m_axi_arprot; // Unused master output + wire [ NUM_PORTS*4-1:0] m_axi_arqos; // Unused master output + wire [ NUM_PORTS*4-1:0] m_axi_arregion; // Unused master output + wire [ NUM_PORTS*1-1:0] m_axi_aruser; // Unused master output + wire [ NUM_PORTS*1-1:0] m_axi_arvalid; + wire [ NUM_PORTS*1-1:0] m_axi_arready; + // AXI Read Data Channel + wire [ NUM_PORTS*1-1:0] m_axi_rid; + wire [NUM_PORTS*MEM_DATA_W-1:0] m_axi_rdata; + wire [ NUM_PORTS*2-1:0] m_axi_rresp; + wire [ NUM_PORTS*1-1:0] m_axi_rlast; + wire [ NUM_PORTS*1-1:0] m_axi_ruser; // Unused master input + wire [ NUM_PORTS*1-1:0] m_axi_rvalid; + wire [ NUM_PORTS*1-1:0] m_axi_rready; + + // Unused master input signals + assign m_axi_buser = {NUM_PORTS{1'b0}}; + assign m_axi_ruser = {NUM_PORTS{1'b0}}; + + for (genvar i = 0; i < NUM_PORTS; i = i+1) begin : gen_sim_axi_ram + sim_axi_ram #( + .AWIDTH (AWIDTH), + .DWIDTH (MEM_DATA_W), + .IDWIDTH (1), + .BIG_ENDIAN (0), + .STALL_PROB (STALL_PROB) + ) sim_axi_ram_i ( + .s_aclk (mem_clk), + .s_aresetn (~mem_rst), + .s_axi_awid (m_axi_awid[i]), + .s_axi_awaddr (m_axi_awaddr[i*AWIDTH +: AWIDTH]), + .s_axi_awlen (m_axi_awlen[i*8 +: 8]), + .s_axi_awsize (m_axi_awsize[i*3 +: 3]), + .s_axi_awburst (m_axi_awburst[i*2 +: 2]), + .s_axi_awvalid (m_axi_awvalid[i]), + .s_axi_awready (m_axi_awready[i]), + .s_axi_wdata (m_axi_wdata[i*MEM_DATA_W +: MEM_DATA_W]), + .s_axi_wstrb (m_axi_wstrb[i*(MEM_DATA_W/8) +: (MEM_DATA_W/8)]), + .s_axi_wlast (m_axi_wlast[i]), + .s_axi_wvalid (m_axi_wvalid[i]), + .s_axi_wready (m_axi_wready[i]), + .s_axi_bid (m_axi_bid[i]), + .s_axi_bresp (m_axi_bresp[i*2 +: 2]), + .s_axi_bvalid (m_axi_bvalid[i]), + .s_axi_bready (m_axi_bready[i]), + .s_axi_arid (m_axi_arid[i]), + .s_axi_araddr (m_axi_araddr[i*AWIDTH +: AWIDTH]), + .s_axi_arlen (m_axi_arlen[i*8 +: 8]), + .s_axi_arsize (m_axi_arsize[i*3 +: 3]), + .s_axi_arburst (m_axi_arburst[i*2 +: 2]), + .s_axi_arvalid (m_axi_arvalid[i]), + .s_axi_arready (m_axi_arready[i]), + .s_axi_rid (m_axi_rid[i]), + .s_axi_rdata (m_axi_rdata[i*MEM_DATA_W +: MEM_DATA_W]), + .s_axi_rresp (m_axi_rresp[i*2 +: 2]), + .s_axi_rlast (m_axi_rlast[i]), + .s_axi_rvalid (m_axi_rvalid[i]), + .s_axi_rready (m_axi_rready[i]) + ); + end + + + //--------------------------------------------------------------------------- + // DUT + //--------------------------------------------------------------------------- + + logic [NUM_PORTS*CHDR_W-1:0] s_rfnoc_chdr_tdata; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tlast; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tvalid; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tready; + + logic [NUM_PORTS*CHDR_W-1:0] m_rfnoc_chdr_tdata; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tlast; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tvalid; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tready; + + // Map the array of BFMs to a flat vector for the DUT + for (genvar i = 0; i < NUM_PORTS; i++) begin : gen_dut_connections + // Connect BFM master to DUT slave port + assign s_rfnoc_chdr_tdata[CHDR_W*i+:CHDR_W] = m_chdr[i].tdata; + assign s_rfnoc_chdr_tlast[i] = m_chdr[i].tlast; + assign s_rfnoc_chdr_tvalid[i] = m_chdr[i].tvalid; + assign m_chdr[i].tready = s_rfnoc_chdr_tready[i]; + + // Connect BFM slave to DUT master port + assign s_chdr[i].tdata = m_rfnoc_chdr_tdata[CHDR_W*i+:CHDR_W]; + assign s_chdr[i].tlast = m_rfnoc_chdr_tlast[i]; + assign s_chdr[i].tvalid = m_rfnoc_chdr_tvalid[i]; + assign m_rfnoc_chdr_tready[i] = s_chdr[i].tready; + end + + rfnoc_block_axi_ram_fifo #( + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W), + .NUM_PORTS (NUM_PORTS), + .MTU (MTU), + .MEM_DATA_W (MEM_DATA_W), + .MEM_ADDR_W (MEM_ADDR_W), + .AWIDTH (AWIDTH), + .FIFO_ADDR_BASE ({ FIFO_ADDR_BASE_1, FIFO_ADDR_BASE_0 }), + .FIFO_ADDR_MASK ({NUM_PORTS{FIFO_ADDR_MASK}}), + .BURST_TIMEOUT ({NUM_PORTS{BURST_TIMEOUT}}), + .IN_FIFO_SIZE (IN_FIFO_SIZE), + .OUT_FIFO_SIZE (OUT_FIFO_SIZE), + .BIST (BIST), + .MEM_CLK_RATE (MEM_CLK_RATE) + ) rfnoc_block_axi_ram_fifo_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .s_rfnoc_chdr_tdata (s_rfnoc_chdr_tdata), + .s_rfnoc_chdr_tlast (s_rfnoc_chdr_tlast), + .s_rfnoc_chdr_tvalid (s_rfnoc_chdr_tvalid), + .s_rfnoc_chdr_tready (s_rfnoc_chdr_tready), + .m_rfnoc_chdr_tdata (m_rfnoc_chdr_tdata), + .m_rfnoc_chdr_tlast (m_rfnoc_chdr_tlast), + .m_rfnoc_chdr_tvalid (m_rfnoc_chdr_tvalid), + .m_rfnoc_chdr_tready (m_rfnoc_chdr_tready), + .rfnoc_core_config (backend.cfg), + .rfnoc_core_status (backend.sts), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk), + .s_rfnoc_ctrl_tdata (m_ctrl.tdata), + .s_rfnoc_ctrl_tlast (m_ctrl.tlast), + .s_rfnoc_ctrl_tvalid (m_ctrl.tvalid), + .s_rfnoc_ctrl_tready (m_ctrl.tready), + .m_rfnoc_ctrl_tdata (s_ctrl.tdata), + .m_rfnoc_ctrl_tlast (s_ctrl.tlast), + .m_rfnoc_ctrl_tvalid (s_ctrl.tvalid), + .m_rfnoc_ctrl_tready (s_ctrl.tready), + .mem_clk (mem_clk), + .axi_rst (mem_rst), + .m_axi_awid (m_axi_awid), + .m_axi_awaddr (m_axi_awaddr), + .m_axi_awlen (m_axi_awlen), + .m_axi_awsize (m_axi_awsize), + .m_axi_awburst (m_axi_awburst), + .m_axi_awlock (m_axi_awlock), + .m_axi_awcache (m_axi_awcache), + .m_axi_awprot (m_axi_awprot), + .m_axi_awqos (m_axi_awqos), + .m_axi_awregion (m_axi_awregion), + .m_axi_awuser (m_axi_awuser), + .m_axi_awvalid (m_axi_awvalid), + .m_axi_awready (m_axi_awready), + .m_axi_wdata (m_axi_wdata), + .m_axi_wstrb (m_axi_wstrb), + .m_axi_wlast (m_axi_wlast), + .m_axi_wuser (m_axi_wuser), + .m_axi_wvalid (m_axi_wvalid), + .m_axi_wready (m_axi_wready), + .m_axi_bid (m_axi_bid), + .m_axi_bresp (m_axi_bresp), + .m_axi_buser (m_axi_buser), + .m_axi_bvalid (m_axi_bvalid), + .m_axi_bready (m_axi_bready), + .m_axi_arid (m_axi_arid), + .m_axi_araddr (m_axi_araddr), + .m_axi_arlen (m_axi_arlen), + .m_axi_arsize (m_axi_arsize), + .m_axi_arburst (m_axi_arburst), + .m_axi_arlock (m_axi_arlock), + .m_axi_arcache (m_axi_arcache), + .m_axi_arprot (m_axi_arprot), + .m_axi_arqos (m_axi_arqos), + .m_axi_arregion (m_axi_arregion), + .m_axi_aruser (m_axi_aruser), + .m_axi_arvalid (m_axi_arvalid), + .m_axi_arready (m_axi_arready), + .m_axi_rid (m_axi_rid), + .m_axi_rdata (m_axi_rdata), + .m_axi_rresp (m_axi_rresp), + .m_axi_rlast (m_axi_rlast), + .m_axi_ruser (m_axi_ruser), + .m_axi_rvalid (m_axi_rvalid), + .m_axi_rready (m_axi_rready) + ); + + + //--------------------------------------------------------------------------- + // Helper Tasks + //--------------------------------------------------------------------------- + + task automatic write_reg(int port, bit [19:0] addr, bit [31:0] value); + blk_ctrl.reg_write((2**RAM_FIFO_ADDR_W)*port + addr, value); + endtask : write_reg + + task automatic write_reg_64(int port, bit [19:0] addr, bit [63:0] value); + blk_ctrl.reg_write((2**RAM_FIFO_ADDR_W)*port + addr + 0, value[31: 0]); + blk_ctrl.reg_write((2**RAM_FIFO_ADDR_W)*port + addr + 4, value[63:32]); + endtask : write_reg_64 + + task automatic read_reg(int port, bit [19:0] addr, output logic [63:0] value); + blk_ctrl.reg_read((2**RAM_FIFO_ADDR_W)*port + addr, value[31: 0]); + endtask : read_reg + + task automatic read_reg_64(int port, bit [19:0] addr, output logic [63:0] value); + blk_ctrl.reg_read((2**RAM_FIFO_ADDR_W)*port + addr + 0, value[31: 0]); + blk_ctrl.reg_read((2**RAM_FIFO_ADDR_W)*port + addr + 4, value[63:32]); + endtask : read_reg_64 + + + // Generate a unique sequence of incrementing numbers + task automatic gen_test_data(int num_bytes, output chdr_word_t queue[$]); + int num_chdr_words; + chdr_word_t val64; + + // Calculate the number of chdr_word_t size words + num_chdr_words = int'($ceil(real'(num_bytes) / ($bits(chdr_word_t) / 8))); + + for (int i = 0; i < num_chdr_words; i++) begin + if (USE_RANDOM) begin + val64 = { $urandom(), $urandom() }; // Random data, for more rigorous testing + end else begin + val64 = i; // Sequential data, for easier debugging + end + queue.push_back(val64); + end + endtask : gen_test_data + + + //--------------------------------------------------------------------------- + // Reset + //--------------------------------------------------------------------------- + + task test_reset(); + test.start_test("Wait for Reset", 10us); + mem_clk_gen.reset(); + blk_ctrl.flush_and_reset(); + wait(!mem_rst); + test.end_test(); + endtask : test_reset + + + //--------------------------------------------------------------------------- + // Check NoC ID and Block Info + //--------------------------------------------------------------------------- + + task test_block_info(); + test.start_test("Verify Block Info", 2us); + `ASSERT_ERROR(blk_ctrl.get_noc_id() == rfnoc_block_axi_ram_fifo_i.NOC_ID, "Incorrect NOC_ID Value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_i() == NUM_PORTS, "Incorrect NUM_DATA_I Value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_o() == NUM_PORTS, "Incorrect NUM_DATA_O Value"); + `ASSERT_ERROR(blk_ctrl.get_mtu() == MTU, "Incorrect MTU Value"); + test.end_test(); + endtask : test_block_info + + + //--------------------------------------------------------------------------- + // Check Unused Signals + //--------------------------------------------------------------------------- + + task test_unused(); + test.start_test("Check unused/static signals"); + for (int port = 0; port < NUM_PORTS; port++) begin + `ASSERT_ERROR(m_axi_awlock [port*1 +: 1] == 1'b0, "m_axi_awlock value unexpected"); + `ASSERT_ERROR(m_axi_awcache [port*4 +: 4] == 4'hF, "m_axi_awcache value unexpected"); + `ASSERT_ERROR(m_axi_awprot [port*3 +: 3] == 3'h2, "m_axi_awprot value unexpected"); + `ASSERT_ERROR(m_axi_awqos [port*4 +: 4] == 4'h0, "m_axi_awqos value unexpected"); + `ASSERT_ERROR(m_axi_awregion [port*4 +: 4] == 4'h0, "m_axi_awregion value unexpected"); + `ASSERT_ERROR(m_axi_awuser [port*1 +: 1] == 1'b0, "m_axi_awuser value unexpected"); + // + `ASSERT_ERROR(m_axi_wuser [port*1 +: 1] == 1'b0, "m_axi_wuser value unexpected"); + // + `ASSERT_ERROR(m_axi_arlock [port*1 +: 1] == 1'b0, "m_axi_arlock value unexpected"); + `ASSERT_ERROR(m_axi_arcache [port*4 +: 4] == 4'hF, "m_axi_arcache value unexpected"); + `ASSERT_ERROR(m_axi_arprot [port*3 +: 3] == 3'h2, "m_axi_arprot value unexpected"); + `ASSERT_ERROR(m_axi_arqos [port*4 +: 4] == 4'h0, "m_axi_arqos value unexpected"); + `ASSERT_ERROR(m_axi_arregion [port*4 +: 4] == 4'h0, "m_axi_arregion value unexpected"); + `ASSERT_ERROR(m_axi_aruser [port*1 +: 1] == 1'b0, "m_axi_aruser value unexpected"); + end + test.end_test(); + endtask : test_unused + + + //--------------------------------------------------------------------------- + // Test Registers + //--------------------------------------------------------------------------- + + task test_registers(); + logic [63:0] val64, expected64, temp64; + logic [31:0] val32, expected32, temp32; + + test.start_test("Test registers", 50us); + + for (int port = 0; port < NUM_PORTS; port++) begin + // + // REG_FIFO_INFO + // + expected32 = 0; + expected32[REG_FIFO_MAGIC_POS +: REG_FIFO_MAGIC_W] = 16'hF1F0; + expected32[REG_FIFO_BIST_PRSNT_POS] = (BIST != 0); + read_reg(port, REG_FIFO_INFO, val32); + `ASSERT_ERROR(val32 == expected32, "Initial value for REG_FIFO_INFO is not correct"); + + // + // REG_FIFO_READ_SUPPRESS + // + expected32 = 0; + expected32[REG_FIFO_IN_FIFO_SIZE_POS+:REG_FIFO_IN_FIFO_SIZE_W] = IN_FIFO_SIZE; + expected32[REG_FIFO_SUPPRESS_THRESH_POS+:REG_FIFO_SUPPRESS_THRESH_W] = 0; + read_reg(port, REG_FIFO_READ_SUPPRESS, val32); + `ASSERT_ERROR(val32 == expected32, "Initial value for REG_FIFO_READ_SUPPRESS is not correct"); + + temp32 = expected32; + expected32[REG_FIFO_SUPPRESS_THRESH_POS+:REG_FIFO_SUPPRESS_THRESH_W] = + ~val32[REG_FIFO_SUPPRESS_THRESH_POS+:REG_FIFO_SUPPRESS_THRESH_W]; + write_reg(port, REG_FIFO_READ_SUPPRESS, expected32); + read_reg(port, REG_FIFO_READ_SUPPRESS, val32); + `ASSERT_ERROR(val32 == expected32, "REG_FIFO_READ_SUPPRESS did not update"); + + expected32 = temp32; + write_reg(port, REG_FIFO_READ_SUPPRESS, expected32); + read_reg(port, REG_FIFO_READ_SUPPRESS, val32); + `ASSERT_ERROR(val32 == expected32, "REG_FIFO_READ_SUPPRESS did not reset"); + + // + // REG_FIFO_MEM_SIZE + // + expected32 = 0; + expected32[REG_FIFO_DATA_SIZE_POS +: REG_FIFO_DATA_SIZE_W] = MEM_DATA_W; + expected32[REG_FIFO_ADDR_SIZE_POS +: REG_FIFO_ADDR_SIZE_W] = MEM_ADDR_W; + read_reg(port, REG_FIFO_MEM_SIZE, val32); + `ASSERT_ERROR(val32 == expected32, "Incorrect REG_FIFO_MEM_SIZE value!"); + + // + // REG_FIFO_TIMEOUT + // + expected32 = BURST_TIMEOUT; + read_reg(port, REG_FIFO_TIMEOUT, val32); + `ASSERT_ERROR(val32 == expected32, "Initial value for REG_FIFO_TIMEOUT is not correct"); + + write_reg(port, REG_FIFO_TIMEOUT, {REG_TIMEOUT_W{1'b1}}); + read_reg(port, REG_FIFO_TIMEOUT, val32); + `ASSERT_ERROR(val32 == {REG_TIMEOUT_W{1'b1}}, "REG_FIFO_TIMEOUT did not update"); + + write_reg(port, REG_FIFO_TIMEOUT, expected32); + read_reg(port, REG_FIFO_TIMEOUT, val32); + `ASSERT_ERROR(val32 == expected32, "REG_FIFO_TIMEOUT did not reset"); + + // + // REG_FIFO_FULLNESS + // + read_reg_64(port, REG_FIFO_FULLNESS_LO, val64); + `ASSERT_ERROR(val64 == 0, "Incorrect REG_FIFO_FULLNESS value!"); + + // + // REG_FIFO_ADDR_BASE + // + expected64 = port * 2**FIFO_ADDR_W; + read_reg_64(port, REG_FIFO_ADDR_BASE_LO, val64); + `ASSERT_ERROR(val64 == expected64, "Initial value for REG_FIFO_ADDR_BASE is not correct"); + + write_reg_64(port, REG_FIFO_ADDR_BASE_LO, {MEM_ADDR_W{1'b1}}); + read_reg_64(port, REG_FIFO_ADDR_BASE_LO, val64); + `ASSERT_ERROR(val64 == {MEM_ADDR_W{1'b1}}, "REG_FIFO_ADDR_BASE did not update"); + + write_reg_64(port, REG_FIFO_ADDR_BASE_LO, expected64); + read_reg_64(port, REG_FIFO_ADDR_BASE_LO, val64); + `ASSERT_ERROR(val64 == expected64, "REG_FIFO_ADDR_BASE did not reset"); + + // + // REG_FIFO_ADDR_MASK + // + expected64 = {FIFO_ADDR_W{1'b1}}; + read_reg_64(port, REG_FIFO_ADDR_MASK_LO, val64); + `ASSERT_ERROR(val64 == expected64, "Initial value for REG_FIFO_ADDR_MASK_LO is not correct"); + + // Set to the max value + write_reg_64(port, REG_FIFO_ADDR_MASK_LO, {MEM_ADDR_W{1'b1}}); + read_reg_64(port, REG_FIFO_ADDR_MASK_LO, val64); + `ASSERT_ERROR(val64 == {MEM_ADDR_W{1'b1}}, "REG_FIFO_ADDR_MASK_LO did not update"); + + // Set to the min value + write_reg_64(port, REG_FIFO_ADDR_MASK_LO, 0); + read_reg_64(port, REG_FIFO_ADDR_MASK_LO, val64); + // Coerce to the minimum allowed value + temp64 = rfnoc_block_axi_ram_fifo_i.gen_ram_fifos[0].axi_ram_fifo_i.FIFO_ADDR_MASK_MIN; + `ASSERT_ERROR(val64 == temp64, "REG_FIFO_ADDR_MASK_LO did not update"); + + write_reg_64(port, REG_FIFO_ADDR_MASK_LO, expected64); + read_reg_64(port, REG_FIFO_ADDR_MASK_LO, val64); + `ASSERT_ERROR(val64 == expected64, "REG_FIFO_ADDR_MASK_LO did not reset"); + + // + // REG_FIFO_PACKET_CNT + // + read_reg(port, REG_FIFO_PACKET_CNT, val32); + `ASSERT_ERROR(val32 == 0, "Incorrect REG_FIFO_PACKET_CNT value!"); + + if (BIST) begin + read_reg(port, REG_BIST_CTRL, val32); + `ASSERT_ERROR(val32 == 0, "Initial value for REG_BIST_CTRL is not correct"); + read_reg(port, REG_BIST_CLK_RATE, val32); + `ASSERT_ERROR(val32 == MEM_CLK_RATE, "Initial value for REG_BIST_CLK_RATE is not correct"); + read_reg_64(port, REG_BIST_NUM_BYTES_LO, val64); + `ASSERT_ERROR(val64 == 0, "Initial value for REG_BIST_NUM_BYTES is not correct"); + read_reg_64(port, REG_BIST_TX_BYTE_COUNT_LO, val64); + `ASSERT_ERROR(val64 == 0, "Initial value for REG_BIST_TX_BYTE_COUNT is not correct"); + read_reg_64(port, REG_BIST_RX_BYTE_COUNT_LO, val64); + `ASSERT_ERROR(val64 == 0, "Initial value for REG_BIST_RX_BYTE_COUNT is not correct"); + read_reg_64(port, REG_BIST_ERROR_COUNT_LO, val64); + `ASSERT_ERROR(val64 == 0, "Initial value for REG_BIST_ERROR_COUNT is not correct"); + read_reg_64(port, REG_BIST_CYCLE_COUNT_LO, val64); + `ASSERT_ERROR(val64 == 0, "Initial value for REG_BIST_CYCLE_COUNT is not correct"); + end + end + + test.end_test(); + endtask : test_registers + + + //--------------------------------------------------------------------------- + // Basic Test + //--------------------------------------------------------------------------- + // + // Push a few packets through each FIFO. + // + //--------------------------------------------------------------------------- + + task test_basic(); + logic [31:0] val32; + + test.start_test("Basic test", NUM_PORTS*20us); + + for (int port = 0; port < NUM_PORTS; port++) begin + chdr_word_t test_data[$]; + logic [63:0] val64; + timeout_t timeout; + + // Generate the test data to send + gen_test_data(PKT_SIZE_BYTES*3, test_data); + + // Queue up the packets to send + blk_ctrl.send_packets(port, test_data); + + // Make sure fullness increases + test.start_timeout(timeout, 4us, + $sformatf("Waiting for fullness to increase on port %0d", port)); + forever begin + read_reg_64(port, REG_FIFO_FULLNESS_LO, val64); + if (val64 != 0) break; + end + test.end_timeout(timeout); + + // Verify the data, one packet at a time + for (int count = 0; count < test_data.size(); ) begin + chdr_word_t recv_data[$]; + int data_bytes; + blk_ctrl.recv(port, recv_data, data_bytes); + + `ASSERT_ERROR( + data_bytes == PKT_SIZE_BYTES, + "Length didn't match expected value" + ); + + for (int i = 0; i < recv_data.size(); i++, count++) begin + if (recv_data[i] != test_data[count]) begin + $display("Expected %X, received %X on port %0d", test_data[count], recv_data[i], port); + end + `ASSERT_ERROR( + recv_data[i] == test_data[count], + "Received data doesn't match expected value" + ); + end + end + + // Make sure the packet count updated + read_reg(port, REG_FIFO_PACKET_CNT, val32); + `ASSERT_ERROR(val32 > 0, "REG_FIFO_PACKET_CNT didn't update"); + end + + test.end_test(); + endtask : test_basic + + + //--------------------------------------------------------------------------- + // Single Byte Test + //--------------------------------------------------------------------------- + + task test_single_byte(); + test.start_test("Single byte test", 20us); + + for (int port = 0; port < NUM_PORTS; port++) begin + chdr_word_t test_data[$]; + chdr_word_t recv_data[$]; + int data_bytes; + + gen_test_data(1, test_data); + + blk_ctrl.send(port, test_data, 1); + blk_ctrl.recv(port, recv_data, data_bytes); + + `ASSERT_ERROR( + data_bytes == 1 && recv_data.size() == CHDR_W/$bits(chdr_word_t), + "Length didn't match expected value" + ); + `ASSERT_ERROR( + recv_data[0][7:0] == test_data[0][7:0], + "Received data doesn't match expected value" + ); + end + + test.end_test(); + endtask : test_single_byte + + + //--------------------------------------------------------------------------- + // Test Overflow + //--------------------------------------------------------------------------- + // + // Fill the FIFO on both ports to make sure if fills correctly and flow + // control works correct at the limits. + // + //--------------------------------------------------------------------------- + + task test_overflow(); + chdr_word_t test_data[NUM_PORTS][$]; + int num_bytes, num_words; + bit [NUM_PORTS-1:0] full_bits; + logic [63:0] val64; + timeout_t timeout; + realtime start_time; + + if (!OVERFLOW) return; + + num_bytes = (MEM_DATA_W/8) * (2**IN_FIFO_SIZE + 2**OUT_FIFO_SIZE) + 2**MEM_ADDR_W; + num_bytes = num_bytes * 3 / 2; + num_words = num_bytes / (CHDR_W/8); + + test.start_test("Overflow test", 10 * num_words * CHDR_CLK_PER); + + // Stall the output of each FIFO, allow unrestricted input + for (int port = 0; port < NUM_PORTS; port++) begin + blk_ctrl.set_master_stall_prob(port, 0); + blk_ctrl.set_slave_stall_prob(port, 100); + end + + // Input more packets into each FIFO than they can fit + for (int port = 0; port < NUM_PORTS; port++) begin + gen_test_data(num_bytes, test_data[port]); + blk_ctrl.send_packets(port, test_data[port]); + end + + // Wait for both inputs to stall + test.start_timeout(timeout, (4 * num_words + 1000) * CHDR_CLK_PER, + $sformatf("Waiting for input to stall")); + full_bits = 0; + forever begin + for (int port = 0; port < NUM_PORTS; port++) begin + full_bits[port] = ~s_rfnoc_chdr_tready[port]; + if (!full_bits[port]) start_time = $realtime; + end + + // Break as soon as all FIFOs have been stalled for 1000 clock cycles + if (full_bits == {NUM_PORTS{1'b1}} && $realtime-start_time > 1000 * CHDR_CLK_PER) break; + #(CHDR_CLK_PER*100); + end + test.end_timeout(timeout); + + // Make sure all the FIFOs filled up + for (int port = 0; port < NUM_PORTS; port++) begin + read_reg_64(port, REG_FIFO_FULLNESS_LO, val64); + // FIFO is full once it comes within 256 words of being full + `ASSERT_ERROR(val64 >= (2**FIFO_ADDR_W / (MEM_DATA_W/8)) - 256, "FIFO not reading full"); + end + + // Restore the input/output rates + for (int port = 0; port < NUM_PORTS; port++) begin + blk_ctrl.set_master_stall_prob(port, STALL_PROB); + blk_ctrl.set_slave_stall_prob(port, STALL_PROB); + end + + // Read out and verify the data + for (int port = 0; port < NUM_PORTS; port++) begin + for (int count = 0; count < test_data[port].size(); ) begin + chdr_word_t recv_data[$]; + int data_bytes; + int expected_length; + blk_ctrl.recv(port, recv_data, data_bytes); + + if (count*($bits(chdr_word_t)/8) + PKT_SIZE_BYTES <= num_bytes) begin + // Should be a full packet + expected_length = PKT_SIZE_BYTES; + end else begin + // Should be a partial packet + expected_length = num_bytes % PKT_SIZE_BYTES; + end + + // Check the length + `ASSERT_ERROR( + data_bytes == expected_length, + "Length didn't match expected value" + ); + + for (int i = 0; i < recv_data.size(); i++, count++) begin + `ASSERT_ERROR( + recv_data[i] == test_data[port][count], + "Received data doesn't match expected value" + ); + end + end + end + + test.end_test(); + endtask : test_overflow + + + //--------------------------------------------------------------------------- + // Test Read Suppression + //--------------------------------------------------------------------------- + + task test_read_suppression(); + chdr_word_t test_data[$]; + logic [31:0] val32, save32; + int port; + + test.start_test("Read suppression test", 100us); + + port = 0; // Only test one port + + // Turn on read suppression with the max threshold to cause it to + // suppress everything. + read_reg(port, REG_FIFO_READ_SUPPRESS, save32); + val32 = save32; + val32[REG_FIFO_SUPPRESS_THRESH_POS +: REG_FIFO_SUPPRESS_THRESH_W] = {REG_FIFO_SUPPRESS_THRESH_W{1'b1}}; + write_reg(port, REG_FIFO_READ_SUPPRESS, val32); + + // Generate the test data to send (send 8 RAM bursts) + gen_test_data(MEM_DATA_W/8 * 256 * 8, test_data); + + // Start sending packets then wait for the input to stall, either because + // we've filled the FIFO or we've input everything. + blk_ctrl.set_master_stall_prob(port, 0); + blk_ctrl.send_packets(port, test_data); + wait (s_rfnoc_chdr_tvalid && s_rfnoc_chdr_tready); + wait (!s_rfnoc_chdr_tvalid || !s_rfnoc_chdr_tready); + + // Make sure nothing made it through + `ASSERT_ERROR(blk_ctrl.num_received(port) == 0, "Read suppression failed"); + + // Turn down the threshold + val32[REG_FIFO_SUPPRESS_THRESH_POS +: REG_FIFO_SUPPRESS_THRESH_W] = {REG_FIFO_SUPPRESS_THRESH_W{1'b0}}; + write_reg(port, REG_FIFO_READ_SUPPRESS, val32); + + blk_ctrl.set_master_stall_prob(port, STALL_PROB); + + // Verify the data, one packet at a time + for (int count = 0; count < test_data.size(); ) begin + chdr_word_t recv_data[$]; + int data_bytes; + blk_ctrl.recv(port, recv_data, data_bytes); + + for (int i = 0; i < recv_data.size(); i++, count++) begin + if (recv_data[i] != test_data[count]) begin + $display("Expected %X, received %X on port %0d", test_data[count], recv_data[i], port); + end + `ASSERT_ERROR( + recv_data[i] == test_data[count], + "Received data doesn't match expected value" + ); + end + end + + // Restore suppression settings + write_reg(port, REG_FIFO_READ_SUPPRESS, save32); + + test.end_test(); + endtask : test_read_suppression + + + //--------------------------------------------------------------------------- + // Random Tests + //--------------------------------------------------------------------------- + // + // Perform a series of random tests with different read/write probabilities + // test unexpected conditions. + // + //--------------------------------------------------------------------------- + + class RandTrans; + chdr_word_t data[$]; + int num_bytes; + endclass; + + task test_random(); + localparam NUM_PACKETS = 256; + + mailbox #(RandTrans) data_queue; + int port; + + test.start_test("Random test", NUM_PACKETS * 2us); + + data_queue = new(); + port = 0; // Just check one port for this test + + // Queue up a bunch of random packets + begin : data_gen + RandTrans trans; + $display("Generating %0d random packets...", NUM_PACKETS); + + for (int packet_count = 0; packet_count < NUM_PACKETS; packet_count++) begin + trans = new(); + trans.num_bytes = $urandom_range(1, PKT_SIZE_BYTES); + gen_test_data(trans.num_bytes, trans.data); + blk_ctrl.send(port, trans.data, trans.num_bytes); + data_queue.put(trans); + end + end + + // Receive and check all the packets + fork + begin : stall_update + // Split the packets into four groups and use different stall + // behavior for each. + // + // 1. Start filling up the FIFO + // 2. Let it run for a while + // 3. Start emptying the FIFO + // 4. Let it run until all the data gets through + // + for (int i = 0; i < 4; i++) begin + case (i) + 0 : begin + $display("Test fast writer, slow reader"); + blk_ctrl.set_master_stall_prob(port, 10); + blk_ctrl.set_slave_stall_prob(port, 80); + end + 1 : begin + $display("Test matched reader/writer"); + blk_ctrl.set_master_stall_prob(port, STALL_PROB); + blk_ctrl.set_slave_stall_prob(port, STALL_PROB); + end + 2 : begin + $display("Test slow writer, fast reader"); + blk_ctrl.set_master_stall_prob(port, 90); + blk_ctrl.set_slave_stall_prob(port, 10); + end + 3 : begin + $display("Test matched reader/writer"); + blk_ctrl.set_master_stall_prob(port, STALL_PROB); + blk_ctrl.set_slave_stall_prob(port, STALL_PROB); + end + endcase + + // Wait for a quarter of the packets to be accepted by the RAM FIFO + blk_ctrl.wait_complete(port, NUM_PACKETS/4); + end + end + begin : data_check + RandTrans trans; + chdr_word_t recv_data[$]; + int num_bytes; + int num_words; + + + for (int packet_count = 0; packet_count < NUM_PACKETS; packet_count++) begin + //$display("Checking packet %0d/%0d...", packet_count, NUM_PACKETS); + + blk_ctrl.recv(port, recv_data, num_bytes); + data_queue.get(trans); + + // Check the length + `ASSERT_ERROR( + num_bytes == trans.num_bytes, + "Length didn't match expected value" + ); + + // If the generated data was an odd number of chdr_word_t words, we + // will get back an extra 0 word at the end. Calculate the correct + // number of words so that we ignore any extra at the end. + num_words = int'($ceil(real'(num_bytes)/($bits(chdr_word_t)/8))); + for (int i = 0; i < num_words; i++) begin + `ASSERT_ERROR( + recv_data[i] == trans.data[i], + "Received data doesn't match expected value" + ); + end + end + end + join + + test.end_test(); + endtask : test_random + + + //--------------------------------------------------------------------------- + // Test Clearing FIFO Block + //--------------------------------------------------------------------------- + + task test_clear(); + test.start_test("FIFO clear test", 100us); + + // TODO: + $warning("Need to write a test flushing and resetting the block!"); + + test.end_test(); + endtask : test_clear + + + //--------------------------------------------------------------------------- + // Test BIST + //--------------------------------------------------------------------------- + + task test_bist(); + logic [31:0] val32; + logic [63:0] val64; + int port; + int num_bytes; + + if (!BIST) return; + + test.start_test("BIST test", 100us); + + port = 0; // Test the first port + num_bytes = 2048; + + // Start a test + write_reg(port, REG_BIST_CTRL, 1 << REG_BIST_CLEAR_POS); + write_reg(port, REG_BIST_NUM_BYTES_LO, num_bytes); + write_reg(port, REG_BIST_CTRL, 1 << REG_BIST_START_POS); + + // Make sure running bit gets set + read_reg(port, REG_BIST_CTRL, val32); + `ASSERT_ERROR(val32[REG_BIST_RUNNING_POS] == 1'b1, "RUNNING bit not set"); + + // Wait for the test to complete + do begin + read_reg(port, REG_BIST_CTRL, val32); + end while(val32[REG_BIST_RUNNING_POS]); + + // Check the results + read_reg_64(port, REG_BIST_TX_BYTE_COUNT_LO, val64); + `ASSERT_ERROR(val64 == num_bytes, "TX_BYTE_COUNT is not correct"); + read_reg_64(port, REG_BIST_RX_BYTE_COUNT_LO, val64); + `ASSERT_ERROR(val64 == num_bytes, "RX_BYTE_COUNT is not correct"); + read_reg_64(port, REG_BIST_ERROR_COUNT_LO, val64); + `ASSERT_ERROR(val64 == 0, "ERROR_COUNT is not zero"); + read_reg_64(port, REG_BIST_CYCLE_COUNT_LO, val64); + `ASSERT_ERROR(val64 > 0, "CYCLE_COUNT did not update"); + + // TODO: + $warning("BIST Continuous mode is NOT being tested"); + $warning("BIST error insertion is NOT being tested (errors might be ignored)"); + + test.end_test(); + endtask : test_bist + + + //--------------------------------------------------------------------------- + // BIST Throughput Test + //--------------------------------------------------------------------------- + // + // This test sanity-checks the values returned by the BIST. If run with the + // other BIST test, it also tests clearing the BIST counters. + // + //--------------------------------------------------------------------------- + + task test_bist_throughput(); + localparam port = 0; // Test the first port + logic [31:0] val32; + logic [63:0] val64; + int num_bytes; + longint rx_byte_count; + longint cycle_count; + real throughput; + real efficiency; + + if (!BIST) return; + + test.start_test("BIST throughput test", 100us); + + num_bytes = 64*1024; + + // Reset the memory probability + gen_sim_axi_ram[port].sim_axi_ram_i.set_stall_prob(0); + + // Start a test + write_reg(port, REG_BIST_CTRL, 1 << REG_BIST_CLEAR_POS); + write_reg(port, REG_BIST_NUM_BYTES_LO, num_bytes); + write_reg(port, REG_BIST_CTRL, 1 << REG_BIST_START_POS); + + // Make sure running bit gets set + read_reg(port, REG_BIST_CTRL, val32); + `ASSERT_ERROR(val32[REG_BIST_RUNNING_POS] == 1'b1, "RUNNING bit not set"); + + // Wait for the test to complete + do begin + read_reg(port, REG_BIST_CTRL, val32); + end while(val32[REG_BIST_RUNNING_POS]); + + // Check the results + read_reg_64(port, REG_BIST_TX_BYTE_COUNT_LO, val64); + `ASSERT_ERROR(val64 == num_bytes, "TX_BYTE_COUNT is not correct"); + read_reg_64(port, REG_BIST_RX_BYTE_COUNT_LO, rx_byte_count); + `ASSERT_ERROR(rx_byte_count == num_bytes, "RX_BYTE_COUNT is not correct"); + read_reg_64(port, REG_BIST_ERROR_COUNT_LO, val64); + `ASSERT_ERROR(val64 == 0, "ERROR_COUNT is not zero"); + read_reg_64(port, REG_BIST_CYCLE_COUNT_LO, cycle_count); + `ASSERT_ERROR(cycle_count > 0, "CYCLE_COUNT did not update"); + + // Throughput = num_bytes / time = num_bytes / (num_cyles * period) + throughput = real'(rx_byte_count) / (real'(cycle_count) / real'(MEM_CLK_RATE)); + + // Efficiency is the actual throughput divided by the theoretical max. We + // use 0.5x in the calculation because we assume that the memory is a + // half-duplex read/write memory running at MEM_CLK_RATE, but we're + // measuring the full-duplex throughput. + efficiency = throughput / (0.5 * real'(MEM_CLK_RATE) * (MEM_DATA_W/8)); + + $display("BIST Throughput: %0.1f MB/s", throughput / 1.0e6); + $display("BIST Efficiency: %0.1f %%", efficiency * 100.0 ); + + `ASSERT_ERROR(efficiency > 0.95, "BIST efficiency was lower than expected"); + + // Restore the memory stall probability + gen_sim_axi_ram[port].sim_axi_ram_i.set_stall_prob(STALL_PROB); + + test.end_test(); + endtask; + + + //--------------------------------------------------------------------------- + // Main Test Process + //--------------------------------------------------------------------------- + + initial begin : tb_main + const int port = 0; + string tb_name; + + // Generate a string for the name of this instance of the testbench + tb_name = $sformatf( + "rfnoc_block_axi_ram_fifo_tb\nCHDR_W = %0D, NUM_PORTS = %0D, MEM_DATA_W = %0D, MEM_ADDR_W = %0D, FIFO_ADDR_W = %0D, IN_FIFO_SIZE = %0D, OUT_FIFO_SIZE = %0D, OVERFLOW = %0D, BIST = %0D", + CHDR_W, NUM_PORTS, MEM_DATA_W, MEM_ADDR_W, FIFO_ADDR_W, IN_FIFO_SIZE, OUT_FIFO_SIZE, OVERFLOW, BIST + ); + test.start_tb(tb_name); + + // Don't start the clocks until after start_tb() returns. This ensures that + // the clocks aren't toggling while other instances of this testbench are + // running, which speeds up simulation time. + rfnoc_chdr_clk_gen.start(); + rfnoc_ctrl_clk_gen.start(); + mem_clk_gen.start(); + + // Start the BFMs running + blk_ctrl.run(); + + // + // Run test procedures + // + test_reset(); + test_block_info(); + test_unused(); + test_registers(); + test_basic(); + test_single_byte(); + test_overflow(); + test_read_suppression(); + test_random(); + test_clear(); + test_bist(); + test_bist_throughput(); + + // End the TB, but don't $finish, since we don't want to kill other + // instances of this testbench that may be running. + test.end_tb(0); + + // Kill the clocks to end this instance of the testbench + rfnoc_chdr_clk_gen.kill(); + rfnoc_ctrl_clk_gen.kill(); + mem_clk_gen.kill(); + end +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/sim_axi_ram.sv b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/sim_axi_ram.sv new file mode 100644 index 000000000..ee7ff5df8 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_axi_ram_fifo/sim_axi_ram.sv @@ -0,0 +1,637 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: sim_axi_ram +// +// Description: +// +// Simulation model for a basic AXI4 memory mapped memory. A few notes on its +// behavior: +// +// - This model does not reorder requests (regardless of WID/RID). All +// requests are evaluated strictly in order. +// - The only supported response is OKAY +// - This model supports misaligned memory accesses, which cause a +// simulation warning. +// - A reset does not clear the memory contents +// - The memory itself is implemented using an associative array (sparse +// matrix) so that large memories can be supported. +// - This model is half duplex, meaning read and write data transfers won't +// happen at the same time. A new data transfer won't begin until the +// previous one has completed. +// + +module sim_axi_ram #( + parameter AWIDTH = 32, + parameter DWIDTH = 64, + parameter IDWIDTH = 2, + parameter BIG_ENDIAN = 0, + parameter STALL_PROB = 25 +) ( + input logic s_aclk, + input logic s_aresetn, + + // Write Address Channel + input logic [IDWIDTH-1:0] s_axi_awid, + input logic [ AWIDTH-1:0] s_axi_awaddr, + input logic [ 7:0] s_axi_awlen, + input logic [ 2:0] s_axi_awsize, + input logic [ 1:0] s_axi_awburst, + input logic s_axi_awvalid, + output logic s_axi_awready, + + // Write Data Channel + input logic [ DWIDTH-1:0] s_axi_wdata, + input logic [DWIDTH/8-1:0] s_axi_wstrb, + input logic s_axi_wlast, + input logic s_axi_wvalid, + output logic s_axi_wready, + + // Write Response Channel + output logic [IDWIDTH-1:0] s_axi_bid, + output logic [ 1:0] s_axi_bresp, + output logic s_axi_bvalid, + input logic s_axi_bready, + + // Read Address Channel + input logic [IDWIDTH-1:0] s_axi_arid, + input logic [ AWIDTH-1:0] s_axi_araddr, + input logic [ 7:0] s_axi_arlen, + input logic [ 2:0] s_axi_arsize, + input logic [ 1:0] s_axi_arburst, + input logic s_axi_arvalid, + output logic s_axi_arready, + + // Read Data Channel + output logic [ 0:0] s_axi_rid, + output logic [DWIDTH-1:0] s_axi_rdata, + output logic [ 1:0] s_axi_rresp, + output logic s_axi_rlast, + output logic s_axi_rvalid, + input logic s_axi_rready +); + + localparam DEBUG = 0; + + //--------------------------------------------------------------------------- + // Data Types + //--------------------------------------------------------------------------- + + typedef enum logic [1:0] { FIXED, INCR, WRAP } burst_t; + typedef enum logic [1:0] { OKAY, EXOKAY, SLVERR, DECERR } resp_t; + + typedef struct packed { + longint count; // Number of requests to wait for before executing + logic [IDWIDTH-1:0] id; + logic [AWIDTH-1:0] addr; + logic [8:0] len; // Add an extra bit, since actual true length is +1 + logic [7:0] size; // Add extra bits to store size in bytes, instead of clog2(size) + burst_t burst; + } req_t; + + // Make the address type an extra bit wide so that we can detect + // out-of-bounds accesses easily. + typedef bit [AWIDTH:0] addr_t; + + // Data word type + typedef logic [DWIDTH-1:0] data_t; + + // Mask to indicate which bits should be written. + typedef bit [DWIDTH/8-1:0] mask_t; + + + //--------------------------------------------------------------------------- + // Data Structures + //--------------------------------------------------------------------------- + + byte memory [addr_t]; // Byte addressable memory + mailbox #(req_t) read_req = new(); // Read request queue + mailbox #(req_t) write_req = new(); // Write request queue + mailbox #(req_t) write_resp = new(); // Write response queue + + longint req_count; // Number of requests received + longint compl_count; // Number of requests completed + + + //--------------------------------------------------------------------------- + // External Configuration Interface + //--------------------------------------------------------------------------- + + int waddr_stall_prob = STALL_PROB; + int wdata_stall_prob = STALL_PROB; + int wresp_stall_prob = STALL_PROB; + int raddr_stall_prob = STALL_PROB; + int rdata_stall_prob = STALL_PROB; + + // Set ALL stall probabilities to the same value + function void set_stall_prob(int probability); + assert(probability >= 0 && probability <= 100) else begin + $error("Probability must be from 0 to 100"); + end + waddr_stall_prob = probability; + wdata_stall_prob = probability; + wresp_stall_prob = probability; + raddr_stall_prob = probability; + rdata_stall_prob = probability; + endfunction : set_stall_prob + + // Set WRITE stall probabilities to the same value + function void set_write_stall_prob(int probability); + assert(probability >= 0 && probability <= 100) else begin + $error("Probability must be from 0 to 100"); + end + waddr_stall_prob = probability; + wdata_stall_prob = probability; + wresp_stall_prob = probability; + endfunction : set_write_stall_prob + + // Set READ stall probabilities to the same value + function void set_read_stall_prob(int probability); + assert(probability >= 0 && probability <= 100) else begin + $error("Probability must be from 0 to 100"); + end + raddr_stall_prob = probability; + rdata_stall_prob = probability; + endfunction : set_read_stall_prob + + // Set Write Address Channel stall probability + function void set_waddr_stall_prob(int probability); + assert(probability >= 0 && probability <= 100) else begin + $error("Probability must be from 0 to 100"); + end + waddr_stall_prob = probability; + endfunction : set_waddr_stall_prob + + // Set Write Data Channel stall probability + function void set_wdata_stall_prob(int probability); + assert(probability >= 0 && probability <= 100) else begin + $error("Probability must be from 0 to 100"); + end + wdata_stall_prob = probability; + endfunction : set_wdata_stall_prob + + // Set Write Response Channel stall probability + function void set_wresp_stall_prob(int probability); + assert(probability >= 0 && probability <= 100) else begin + $error("Probability must be from 0 to 100"); + end + wresp_stall_prob = probability; + endfunction : set_wresp_stall_prob + + // Set Read Address Channel stall probability + function void set_raddr_stall_prob(int probability); + assert(probability >= 0 && probability <= 100) else begin + $error("Probability must be from 0 to 100"); + end + raddr_stall_prob = probability; + endfunction : set_raddr_stall_prob + + // Set Read Data Channel stall probability + function void set_rdata_stall_prob(int probability); + assert(probability >= 0 && probability <= 100) else begin + $error("Probability must be from 0 to 100"); + end + rdata_stall_prob = probability; + endfunction : set_rdata_stall_prob + + // Get Write Address Channel stall probability + function int get_waddr_stall_prob(); + return waddr_stall_prob; + endfunction : get_waddr_stall_prob + + // Get Write Data Channel stall probability + function int get_wdata_stall_prob(); + return wdata_stall_prob; + endfunction : get_wdata_stall_prob + + // Get Write Response Channel stall probability + function int get_wresp_stall_prob(); + return wresp_stall_prob; + endfunction : get_wresp_stall_prob + + // Get Read Address Channel stall probability + function int get_raddr_stall_prob(); + return raddr_stall_prob; + endfunction : get_raddr_stall_prob + + // Get Read Data Channel stall probability + function int get_rdata_stall_prob(); + return rdata_stall_prob; + endfunction : get_rdata_stall_prob + + + + //--------------------------------------------------------------------------- + // Helper Functions + //--------------------------------------------------------------------------- + + function data_t read_mem(addr_t byte_addr, int num_bytes); + data_t data; + addr_t incr; + + if (BIG_ENDIAN) begin + byte_addr = byte_addr + num_bytes-1; + incr = -1; + end else begin + incr = 1; + end + + for (int i = 0; i < num_bytes; i++) begin + if (byte_addr >= 2**AWIDTH) begin + $fatal(1, "Read extends beyond memory range"); + end + if (memory.exists(byte_addr)) data[i*8 +: 8] = memory[byte_addr]; + else data[i*8 +: 8] = 'X; + byte_addr += incr; + end + + return data; + endfunction : read_mem + + + function void write_mem(addr_t byte_addr, int num_bytes, data_t data, mask_t mask); + addr_t incr; + + if (BIG_ENDIAN) begin + byte_addr = byte_addr + num_bytes-1; + incr = -1; + end else begin + incr = 1; + end + + for (int i = 0; i < num_bytes; i++) begin + if (mask[i]) begin + if (byte_addr >= 2**AWIDTH) begin + $fatal(1, "Write extends beyond memory range"); + end + memory[byte_addr] = data[i*8 +: 8]; + end + byte_addr += incr; + end + endfunction : write_mem + + + //--------------------------------------------------------------------------- + // Write Requests + //--------------------------------------------------------------------------- + + initial begin : write_req_proc + req_t req; + burst_t burst; + + s_axi_awready <= 0; + + forever begin + @(posedge s_aclk); + if (!s_aresetn) continue; + + if (s_axi_awvalid) begin + if (s_axi_awready) begin + req.count = req_count; + req.id = s_axi_awid; + req.addr = s_axi_awaddr; + req.len = s_axi_awlen + 1; // Per AXI4 spec, Burst_length = AxLEN[7:0] + 1 + req.size = 2**s_axi_awsize; // Store as true size in bytes, not clog2(size) + req.burst = burst_t'(s_axi_awburst); + + // Check that the request is valid + assert (!$isunknown(req)) else begin + $fatal(1, "Write request signals are unknown"); + end + assert (s_axi_araddr % (DWIDTH/8) == 0) else begin + $warning("Unaligned memory write"); + end + assert (2**s_axi_awsize <= DWIDTH/8) else begin + $fatal(1, "AWSIZE must not be larger than DWIDTH"); + end + assert ($cast(burst, s_axi_awburst)) else begin + $fatal(1, "Invalid AWBURST value"); + end + + if (DEBUG) begin + $display("WRITE REQ: id=%X, addr=%X, len=%X, size=%X, burst=%s, %t, %m", + req.id, req.addr, req.len, req.size, req.burst.name, $realtime); + end + + req_count++; + write_req.put(req); + end + + // Randomly deassert ready + s_axi_awready <= $urandom_range(99) < waddr_stall_prob ? 0 : 1; + end + end + end : write_req_proc + + + //--------------------------------------------------------------------------- + // Read Requests + //--------------------------------------------------------------------------- + + initial begin : read_req_proc + req_t req; + burst_t burst; + + s_axi_arready <= 0; + + forever begin + @(posedge s_aclk); + if (!s_aresetn) continue; + + if (s_axi_arvalid) begin + if (s_axi_arready) begin + req.count = req_count; + req.id = s_axi_arid; + req.addr = s_axi_araddr; + req.len = s_axi_arlen + 1; // Per AXI4 spec, Burst_length = AxLEN[7:0] + 1 + req.size = 2**s_axi_arsize; // Store as true size in bytes, not clog2(size) + req.burst = burst_t'(s_axi_arburst); + + // Check that the request is valid + assert(!$isunknown(req)) else begin + $fatal(1, "Read request signals are unknown"); + end + assert(s_axi_araddr % (DWIDTH/8) == 0) else begin + $warning("Unaligned memory read"); + end + assert(2**s_axi_arsize <= DWIDTH/8) else begin + $fatal(1, "ARSIZE must not be larger than DWIDTH"); + end + assert ($cast(burst, s_axi_awburst)) else begin + $fatal(1, "Invalid ARBURST value"); + end + + if (DEBUG) begin + $display("READ REQ: id=%X, addr=%X, len=%X, size=%X, burst=%s, %t, %m", + req.id, req.addr, req.len, req.size, req.burst.name, $realtime); + end + + req_count++; + read_req.put(req); + end + + // Randomly deassert ready to cause a stall + s_axi_arready <= $urandom_range(99) < raddr_stall_prob ? 0 : 1; + end + end + end : read_req_proc + + + //--------------------------------------------------------------------------- + // Write Data + //--------------------------------------------------------------------------- + + initial begin : write_data_proc + req_t req; + bit [AWIDTH-1:0] addr; + + forever begin + // Wait for the next write request + s_axi_wready <= 0; + write_req.get(req); + + // Wait for previous requests to complete + while (compl_count < req.count) begin + @(posedge s_aclk); + if (!s_aresetn) break; + end + + // If reset was asserted, clear the request queue and start over + if (!s_aresetn) begin + while(write_req.try_get(req)); + continue; + end + + // Iterate over the number of words in the request + for (int i = 0; i < req.len; ) begin + @(posedge s_aclk); + if (!s_aresetn) break; + + // Check if we have a new data word + if (s_axi_wvalid) begin + if (s_axi_wready) begin + // Check the inputs + if ($isunknown(s_axi_wstrb)) begin + $fatal(1, "WSTRB is unknown"); + end + if ($isunknown(s_axi_wdata)) begin + $warning(1, "WDATA is unknown; data will be changed to zero"); + end + + case (req.burst) + FIXED : begin + addr = req.addr; + end + INCR : begin + // If the address rolls over, we've reached the end of the + // memory and we should stop here. + addr = req.addr + i*req.size; + if (addr < req.addr) break; + end + WRAP : begin + // Allow roll-over + addr = req.addr + i*req.size; + end + endcase + + write_mem(addr, req.size, s_axi_wdata, s_axi_wstrb); + + if (DEBUG) begin + $display("WRITE: count=%3X, ADDR=%X, DATA=%X, SIZE=%X, STRB=%X, %t, %m", + i, addr, s_axi_wdata, req.size, s_axi_wstrb, $realtime); + end + + i++; + end + + // Randomly deassert ready to cause a stall + s_axi_wready <= $urandom_range(99) < wdata_stall_prob ? 0 : 1; + end + end // for + + // If reset was asserted, clear the request queue and start over + if (!s_aresetn) begin + while(write_req.try_get(req)); + continue; + end + + compl_count++; + + // Enqueue write response + write_resp.put(req); + + // Make sure WLAST asserted for the last word. If not we report an error. + // Per the AXI4 standard, "a slave is not required to use the WLAST + // signal" because "a slave can calculate the last write data transfer + // from the burst length AWLEN". + if (s_axi_wlast != 1'b1) begin + $error("WLAST not asserted on last word of burst"); + end + + end // forever + end : write_data_proc + + + //--------------------------------------------------------------------------- + // Write Response + //--------------------------------------------------------------------------- + + initial begin : write_resp_proc + req_t resp; + bit [AWIDTH-1:0] addr; + + forever begin + s_axi_bid <= 'X; + s_axi_bresp <= 'X; + s_axi_bvalid <= 0; + + // Wait for the next write response + write_resp.get(resp); + @(posedge s_aclk); + + // If there's a reset, clear the response queue and start over + if (!s_aresetn) begin + while(write_resp.try_get(resp)); + continue; + end + + // Randomly keep bvalid deasserted for next word to cause a stall + if ($urandom_range(99) < wresp_stall_prob) begin + do begin + @(posedge s_aclk); + if (!s_aresetn) break; + end while ($urandom_range(99) < wresp_stall_prob); + + // If reset was asserted, clear the response queue and start over + if (!s_aresetn) begin + while(write_resp.try_get(resp)); + continue; + end + end + + // Output the next response + s_axi_bid <= resp.id; + s_axi_bresp <= OKAY; + s_axi_bvalid <= 1; + + if (DEBUG) begin + $display("WRITE RESP: ID=%X, %t, %m", resp.id, $realtime); + end + + // Wait for the response to be accepted + do begin + @(posedge s_aclk); + if (!s_aresetn) break; + end while (!s_axi_bready); + + // Output the next response + s_axi_bid <= 'X; + s_axi_bresp <= 'X; + s_axi_bvalid <= 0; + + // If reset was asserted, clear the response queue and start over + if (!s_aresetn) begin + while(write_resp.try_get(resp)); + continue; + end + end // forever + end : write_resp_proc + + + //--------------------------------------------------------------------------- + // Read Data + //--------------------------------------------------------------------------- + + initial begin : read_data_proc + req_t req; + bit [AWIDTH-1:0] addr; + logic [DWIDTH-1:0] data; + + forever begin + s_axi_rid <= 'X; + s_axi_rdata <= 'X; + s_axi_rresp <= 'X; + s_axi_rlast <= 'X; + s_axi_rvalid <= 0; + + // Wait for the next read request + read_req.get(req); + + // Wait for previous requests to complete + do begin + @(posedge s_aclk); + if (!s_aresetn) break; + end while (compl_count < req.count); + + // If reset was asserted, clear the request queue and start over + if (!s_aresetn) begin + while(read_req.try_get(req)); + continue; + end + + for (int i = 0; i < req.len; i++) begin + // Randomly keep rvalid deasserted for next word to cause a stall + if ($urandom_range(99) < rdata_stall_prob) begin + do begin + @(posedge s_aclk); + if (!s_aresetn) break; + end while ($urandom_range(99) < rdata_stall_prob); + if (!s_aresetn) break; + end + + case (req.burst) + FIXED : begin + addr = req.addr; + end + INCR : begin + // If the address rolls over, we've reached the end of the memory + // and we should stop here. + addr = req.addr + i*req.size; + if (addr < req.addr) break; + end + WRAP : begin + // Allow roll-over + addr = req.addr + i*req.size; + end + endcase + + // Read the memory + data = read_mem(addr, req.size); + + // Output the next word + s_axi_rid <= req.id; + s_axi_rdata <= data; + s_axi_rresp <= OKAY; + s_axi_rlast <= (i == req.len-1); + s_axi_rvalid <= 1; + + if (DEBUG) begin + $display("READ: count=%3X, ADDR=%X, DATA=%X, SIZE=%X, %t, %m", i, addr, data, req.size, $realtime); + end + + // Wait for the word to be captured + do begin + @(posedge s_aclk); + if (!s_aresetn) break; + end while (!s_axi_rready); + + s_axi_rid <= 'X; + s_axi_rdata <= 'X; + s_axi_rresp <= 'X; + s_axi_rlast <= 'X; + s_axi_rvalid <= 0; + end // for + + // If reset was asserted, clear the request queue and start over + if (!s_aresetn) begin + while(read_req.try_get(req)); + end + + compl_count++; + + end // forever + end : read_data_proc + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/Makefile b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/Makefile new file mode 100644 index 000000000..d574c9a01 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/Makefile @@ -0,0 +1,68 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# IP Specific +#------------------------------------------------- +# If simulation contains IP, define the IP_DIR and point +# it to the base level IP directory +LIB_IP_DIR = $(BASE_DIR)/../lib/ip + +# Include makefiles and sources for all IP components +# *after* defining the LIB_IP_DIR +#include $(LIB_IP_DIR)/axi_fft/Makefile.inc +#include $(LIB_IP_DIR)/complex_to_magphase/Makefile.inc +include $(LIB_IP_DIR)/complex_multiplier_dds/Makefile.inc +include $(LIB_IP_DIR)/dds_sin_cos_lut_only/Makefile.inc +include $(BASE_DIR)/x300/coregen_dsp/Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(LIB_IP_COMPLEX_MULTIPLIER_DDS_SRCS) \ +$(LIB_IP_DDS_SIN_COS_LUT_ONLY_SRCS) \ +$(COREGEN_DSP_SRCS) \ +) + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/utils/Makefile.srcs +include Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(RFNOC_CORE_SRCS) \ +$(RFNOC_UTIL_SRCS) \ +$(RFNOC_BLOCK_DDC_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +SIM_TOP = rfnoc_block_ddc_tb + +# Add test bench, user design under test, and +# additional user created files +SIM_SRCS = \ +$(COREGEN_DSP_SRCS) \ +$(abspath rfnoc_block_ddc_tb.sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/Makefile.srcs b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/Makefile.srcs new file mode 100644 index 000000000..28663f03c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/Makefile.srcs @@ -0,0 +1,11 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +RFNOC_BLOCK_DDC_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/blocks/rfnoc_block_ddc/, \ +noc_shell_ddc.v \ +rfnoc_block_ddc_regs.vh \ +rfnoc_block_ddc.v \ +)) diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/noc_shell_ddc.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/noc_shell_ddc.v new file mode 100644 index 000000000..56a13ee0a --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/noc_shell_ddc.v @@ -0,0 +1,291 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: noc_shell_ddc +// +// Description: A NoC Shell for RFNoC. This should eventually be replaced +// by an auto-generated NoC Shell. +// + +module noc_shell_ddc #( + parameter [31:0] NOC_ID = 32'h0, + parameter [ 9:0] THIS_PORTID = 10'd0, + parameter CHDR_W = 64, + parameter [ 0:0] CTRLPORT_SLV_EN = 1, + parameter [ 0:0] CTRLPORT_MST_EN = 1, + parameter [ 5:0] CTRL_FIFO_SIZE = 6, + parameter [ 5:0] NUM_DATA_I = 1, + parameter [ 5:0] NUM_DATA_O = 1, + parameter ITEM_W = 32, + parameter NIPC = 2, + parameter PYLD_FIFO_SIZE = 10, + parameter MTU = 10 +)( + //--------------------------------------------------------------------------- + // Framework Interface + //--------------------------------------------------------------------------- + + // RFNoC Framework Clocks and Resets + input wire rfnoc_chdr_clk, + output wire rfnoc_chdr_rst, + input wire rfnoc_ctrl_clk, + output wire rfnoc_ctrl_rst, + // RFNoC Backend Interface + input wire [ 511:0] rfnoc_core_config, + output wire [ 511:0] rfnoc_core_status, + // CHDR Input Ports (from framework) + input wire [(CHDR_W*NUM_DATA_I)-1:0] s_rfnoc_chdr_tdata, + input wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tlast, + input wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tvalid, + output wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tready, + // CHDR Output Ports (to framework) + output wire [(CHDR_W*NUM_DATA_O)-1:0] m_rfnoc_chdr_tdata, + output wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tlast, + output wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tvalid, + input wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tready, + // AXIS-Ctrl Input Port (from framework) + input wire [ 31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + // AXIS-Ctrl Output Port (to framework) + output wire [ 31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready, + + //--------------------------------------------------------------------------- + // Client Control Port Interface + //--------------------------------------------------------------------------- + + // Clock + input wire ctrlport_clk, + input wire ctrlport_rst, + // Master + output wire m_ctrlport_req_wr, + output wire m_ctrlport_req_rd, + output wire [19:0] m_ctrlport_req_addr, + output wire [31:0] m_ctrlport_req_data, + output wire [ 3:0] m_ctrlport_req_byte_en, + output wire m_ctrlport_req_has_time, + output wire [63:0] m_ctrlport_req_time, + input wire m_ctrlport_resp_ack, + input wire [ 1:0] m_ctrlport_resp_status, + input wire [31:0] m_ctrlport_resp_data, + // Slave + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [ 9:0] s_ctrlport_req_portid, + input wire [15:0] s_ctrlport_req_rem_epid, + input wire [ 9:0] s_ctrlport_req_rem_portid, + input wire [31:0] s_ctrlport_req_data, + input wire [ 3:0] s_ctrlport_req_byte_en, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + output wire s_ctrlport_resp_ack, + output wire [ 1:0] s_ctrlport_resp_status, + output wire [31:0] s_ctrlport_resp_data, + + //--------------------------------------------------------------------------- + // Client Data Interface + //--------------------------------------------------------------------------- + + // Clock + input wire axis_data_clk, + input wire axis_data_rst, + + // Output data stream (to user logic) + output wire [(NUM_DATA_I*ITEM_W*NIPC)-1:0] m_axis_tdata, + output wire [ (NUM_DATA_I*NIPC)-1:0] m_axis_tkeep, + output wire [ NUM_DATA_I-1:0] m_axis_tlast, + output wire [ NUM_DATA_I-1:0] m_axis_tvalid, + input wire [ NUM_DATA_I-1:0] m_axis_tready, + // Sideband information + output wire [ (NUM_DATA_I*64)-1:0] m_axis_ttimestamp, + output wire [ NUM_DATA_I-1:0] m_axis_thas_time, + output wire [ (NUM_DATA_I*16)-1:0] m_axis_tlength, + output wire [ NUM_DATA_I-1:0] m_axis_teov, + output wire [ NUM_DATA_I-1:0] m_axis_teob, + + // Input data stream (from user logic) + input wire [(NUM_DATA_O*ITEM_W*NIPC)-1:0] s_axis_tdata, + input wire [ (NUM_DATA_O*NIPC)-1:0] s_axis_tkeep, + input wire [ NUM_DATA_O-1:0] s_axis_tlast, + input wire [ NUM_DATA_O-1:0] s_axis_tvalid, + output wire [ NUM_DATA_O-1:0] s_axis_tready, + // Sideband info (sampled on the first cycle of the packet) + input wire [ (NUM_DATA_O*64)-1:0] s_axis_ttimestamp, + input wire [ NUM_DATA_O-1:0] s_axis_thas_time, + input wire [ NUM_DATA_O-1:0] s_axis_teov, + input wire [ NUM_DATA_O-1:0] s_axis_teob +); + + localparam SNK_INFO_FIFO_SIZE = 4; + localparam SNK_PYLD_FIFO_SIZE = PYLD_FIFO_SIZE; + localparam SRC_INFO_FIFO_SIZE = 4; + localparam SRC_PYLD_FIFO_SIZE = (MTU > PYLD_FIFO_SIZE) ? MTU : PYLD_FIFO_SIZE; + + //--------------------------------------------------------------------------- + // Backend Interface + //--------------------------------------------------------------------------- + + wire data_i_flush_en; + wire [31:0] data_i_flush_timeout; + wire [63:0] data_i_flush_active; + wire [63:0] data_i_flush_done; + wire data_o_flush_en; + wire [31:0] data_o_flush_timeout; + wire [63:0] data_o_flush_active; + wire [63:0] data_o_flush_done; + + backend_iface #( + .NOC_ID (NOC_ID), + .NUM_DATA_I (NUM_DATA_I), + .NUM_DATA_O (NUM_DATA_O), + .CTRL_FIFOSIZE (CTRL_FIFO_SIZE), + .MTU (MTU) + ) backend_iface_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk), + .rfnoc_core_config (rfnoc_core_config), + .rfnoc_core_status (rfnoc_core_status), + .rfnoc_chdr_rst (rfnoc_chdr_rst), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst), + .data_i_flush_en (data_i_flush_en), + .data_i_flush_timeout (data_i_flush_timeout), + .data_i_flush_active (data_i_flush_active), + .data_i_flush_done (data_i_flush_done), + .data_o_flush_en (data_o_flush_en), + .data_o_flush_timeout (data_o_flush_timeout), + .data_o_flush_active (data_o_flush_active), + .data_o_flush_done (data_o_flush_done) + ); + + //--------------------------------------------------------------------------- + // Control Path + //--------------------------------------------------------------------------- + + ctrlport_endpoint #( + .THIS_PORTID (THIS_PORTID ), + .SYNC_CLKS (0 ), + .AXIS_CTRL_MST_EN (CTRLPORT_SLV_EN), + .AXIS_CTRL_SLV_EN (CTRLPORT_MST_EN), + .SLAVE_FIFO_SIZE (CTRL_FIFO_SIZE ) + ) ctrlport_ep_i ( + .rfnoc_ctrl_clk (rfnoc_ctrl_clk ), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst ), + .ctrlport_clk (ctrlport_clk ), + .ctrlport_rst (ctrlport_rst ), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata ), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast ), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid ), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready ), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata ), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast ), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid ), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready ), + .m_ctrlport_req_wr (m_ctrlport_req_wr ), + .m_ctrlport_req_rd (m_ctrlport_req_rd ), + .m_ctrlport_req_addr (m_ctrlport_req_addr ), + .m_ctrlport_req_data (m_ctrlport_req_data ), + .m_ctrlport_req_byte_en (m_ctrlport_req_byte_en ), + .m_ctrlport_req_has_time (m_ctrlport_req_has_time ), + .m_ctrlport_req_time (m_ctrlport_req_time ), + .m_ctrlport_resp_ack (m_ctrlport_resp_ack ), + .m_ctrlport_resp_status (m_ctrlport_resp_status ), + .m_ctrlport_resp_data (m_ctrlport_resp_data ), + .s_ctrlport_req_wr (s_ctrlport_req_wr ), + .s_ctrlport_req_rd (s_ctrlport_req_rd ), + .s_ctrlport_req_addr (s_ctrlport_req_addr ), + .s_ctrlport_req_portid (s_ctrlport_req_portid ), + .s_ctrlport_req_rem_epid (s_ctrlport_req_rem_epid ), + .s_ctrlport_req_rem_portid(s_ctrlport_req_rem_portid), + .s_ctrlport_req_data (s_ctrlport_req_data ), + .s_ctrlport_req_byte_en (s_ctrlport_req_byte_en ), + .s_ctrlport_req_has_time (s_ctrlport_req_has_time ), + .s_ctrlport_req_time (s_ctrlport_req_time ), + .s_ctrlport_resp_ack (s_ctrlport_resp_ack ), + .s_ctrlport_resp_status (s_ctrlport_resp_status ), + .s_ctrlport_resp_data (s_ctrlport_resp_data ) + ); + + //--------------------------------------------------------------------------- + // Data Path + //--------------------------------------------------------------------------- + + genvar i; + generate + + for (i = 0; i < NUM_DATA_I; i = i + 1) begin: chdr_to_data + chdr_to_axis_data #( + .CHDR_W (CHDR_W), + .ITEM_W (ITEM_W), + .NIPC (NIPC), + .SYNC_CLKS (0), + .INFO_FIFO_SIZE (SNK_INFO_FIFO_SIZE), + .PYLD_FIFO_SIZE (SNK_PYLD_FIFO_SIZE) + ) chdr_to_axis_data_i ( + .axis_chdr_clk (rfnoc_chdr_clk), + .axis_chdr_rst (rfnoc_chdr_rst), + .axis_data_clk (axis_data_clk), + .axis_data_rst (axis_data_rst), + .s_axis_chdr_tdata (s_rfnoc_chdr_tdata [(i*CHDR_W)+:CHDR_W]), + .s_axis_chdr_tlast (s_rfnoc_chdr_tlast [i]), + .s_axis_chdr_tvalid (s_rfnoc_chdr_tvalid [i]), + .s_axis_chdr_tready (s_rfnoc_chdr_tready [i]), + .m_axis_tdata (m_axis_tdata [i*ITEM_W*NIPC +: ITEM_W*NIPC]), + .m_axis_tkeep (m_axis_tkeep [i*NIPC +: NIPC]), + .m_axis_tlast (m_axis_tlast [i]), + .m_axis_tvalid (m_axis_tvalid [i]), + .m_axis_tready (m_axis_tready [i]), + .m_axis_ttimestamp (m_axis_ttimestamp [i*64 +: 64]), + .m_axis_thas_time (m_axis_thas_time [i]), + .m_axis_tlength (m_axis_tlength [i*16 +: 16]), + .m_axis_teov (m_axis_teov [i]), + .m_axis_teob (m_axis_teob [i]), + .flush_en (data_i_flush_en), + .flush_timeout (data_i_flush_timeout), + .flush_active (data_i_flush_active [i]), + .flush_done (data_i_flush_done [i]) + ); + end + + for (i = 0; i < NUM_DATA_O; i = i + 1) begin: data_to_chdr + axis_data_to_chdr #( + .CHDR_W (CHDR_W), + .ITEM_W (ITEM_W), + .NIPC (NIPC), + .SYNC_CLKS (0), + .INFO_FIFO_SIZE (4), + .PYLD_FIFO_SIZE (SRC_INFO_FIFO_SIZE), + .MTU (SRC_PYLD_FIFO_SIZE) + ) axis_data_to_chdr_i ( + .axis_chdr_clk (rfnoc_chdr_clk), + .axis_chdr_rst (rfnoc_chdr_rst), + .axis_data_clk (axis_data_clk), + .axis_data_rst (axis_data_rst), + .m_axis_chdr_tdata (m_rfnoc_chdr_tdata [i*CHDR_W +: CHDR_W]), + .m_axis_chdr_tlast (m_rfnoc_chdr_tlast [i]), + .m_axis_chdr_tvalid (m_rfnoc_chdr_tvalid [i]), + .m_axis_chdr_tready (m_rfnoc_chdr_tready [i]), + .s_axis_tdata (s_axis_tdata [i*ITEM_W*NIPC +: ITEM_W*NIPC]), + .s_axis_tkeep (s_axis_tkeep [i*NIPC +: NIPC]), + .s_axis_tlast (s_axis_tlast [i]), + .s_axis_tvalid (s_axis_tvalid [i]), + .s_axis_tready (s_axis_tready [i]), + .s_axis_ttimestamp (s_axis_ttimestamp [i*64 +: 64]), + .s_axis_thas_time (s_axis_thas_time [i]), + .s_axis_teov (s_axis_teov [i]), + .s_axis_teob (s_axis_teob [i]), + .flush_en (data_o_flush_en), + .flush_timeout (data_o_flush_timeout), + .flush_active (data_o_flush_active [i]), + .flush_done (data_o_flush_done [i]) + ); + end + endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/rfnoc_block_ddc.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/rfnoc_block_ddc.v new file mode 100644 index 000000000..3162743b6 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/rfnoc_block_ddc.v @@ -0,0 +1,420 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_ddc +// +// Description: An digital down-converter block for RFNoC. +// +// Parameters: +// +// THIS_PORTID : Control crossbar port to which this block is connected +// CHDR_W : AXIS CHDR interface data width +// NUM_PORTS : Number of DDCs to instantiate +// MTU : Maximum transmission unit (i.e., maximum packet size) in +// CHDR words is 2**MTU. +// CTRL_FIFO_SIZE : Size of the Control Port slave FIFO. This affects the +// number of outstanding commands that can be pending. +// NUM_HB : Number of half-band decimation blocks to include (0-3) +// CIC_MAX_DECIM : Maximum decimation to support in the CIC filter +// + +module rfnoc_block_ddc #( + parameter THIS_PORTID = 0, + parameter CHDR_W = 64, + parameter NUM_PORTS = 2, + parameter MTU = 10, + parameter CTRL_FIFO_SIZE = 6, + parameter NUM_HB = 3, + parameter CIC_MAX_DECIM = 255 +) ( + //--------------------------------------------------------------------------- + // AXIS CHDR Port + //--------------------------------------------------------------------------- + + input wire rfnoc_chdr_clk, + input wire ce_clk, + + // CHDR inputs from framework + input wire [NUM_PORTS*CHDR_W-1:0] s_rfnoc_chdr_tdata, + input wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tlast, + input wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tvalid, + output wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tready, + + // CHDR outputs to framework + output wire [NUM_PORTS*CHDR_W-1:0] m_rfnoc_chdr_tdata, + output wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tlast, + output wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tvalid, + input wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tready, + + // Backend interface + input wire [511:0] rfnoc_core_config, + output wire [511:0] rfnoc_core_status, + + //--------------------------------------------------------------------------- + // AXIS CTRL Port + //--------------------------------------------------------------------------- + + input wire rfnoc_ctrl_clk, + + // CTRL port requests from framework + input wire [31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + + // CTRL port requests to framework + output wire [31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready +); + + // These are the only supported values for now + localparam ITEM_W = 32; + localparam NIPC = 1; + + localparam NOC_ID = 'hDDC0_0000; + + localparam COMPAT_MAJOR = 16'h0; + localparam COMPAT_MINOR = 16'h0; + + `include "rfnoc_block_ddc_regs.vh" + `include "../../core/rfnoc_axis_ctrl_utils.vh" + + + //--------------------------------------------------------------------------- + // Signal Declarations + //--------------------------------------------------------------------------- + + wire rfnoc_chdr_rst; + + wire ctrlport_req_wr; + wire ctrlport_req_rd; + wire [19:0] ctrlport_req_addr; + wire [31:0] ctrlport_req_data; + wire ctrlport_req_has_time; + wire [63:0] ctrlport_req_time; + wire ctrlport_resp_ack; + wire [31:0] ctrlport_resp_data; + + wire [NUM_PORTS*ITEM_W-1:0] m_axis_data_tdata; + wire [ NUM_PORTS-1:0] m_axis_data_tlast; + wire [ NUM_PORTS-1:0] m_axis_data_tvalid; + wire [ NUM_PORTS-1:0] m_axis_data_tready; + wire [ NUM_PORTS*64-1:0] m_axis_data_ttimestamp; + wire [ NUM_PORTS-1:0] m_axis_data_thas_time; + wire [ 16*NUM_PORTS-1:0] m_axis_data_tlength; + wire [ NUM_PORTS-1:0] m_axis_data_teob; + wire [ NUM_PORTS*128-1:0] m_axis_data_tuser; + + wire [NUM_PORTS*ITEM_W-1:0] s_axis_data_tdata; + wire [ NUM_PORTS-1:0] s_axis_data_tlast; + wire [ NUM_PORTS-1:0] s_axis_data_tvalid; + wire [ NUM_PORTS-1:0] s_axis_data_tready; + wire [ NUM_PORTS*128-1:0] s_axis_data_tuser; + wire [ NUM_PORTS-1:0] s_axis_data_teob; + wire [ NUM_PORTS*64-1:0] s_axis_data_ttimestamp; + wire [ NUM_PORTS-1:0] s_axis_data_thas_time; + + wire ddc_rst; + + // Cross the CHDR reset to the ce_clk domain + synchronizer ddc_rst_sync_i ( + .clk (ce_clk), + .rst (1'b0), + .in (rfnoc_chdr_rst), + .out (ddc_rst) + ); + + + //--------------------------------------------------------------------------- + // NoC Shell + //--------------------------------------------------------------------------- + + noc_shell_ddc #( + .NOC_ID (NOC_ID), + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W), + .CTRLPORT_SLV_EN (0), + .CTRLPORT_MST_EN (1), + .CTRL_FIFO_SIZE (CTRL_FIFO_SIZE), + .NUM_DATA_I (NUM_PORTS), + .NUM_DATA_O (NUM_PORTS), + .ITEM_W (ITEM_W), + .NIPC (NIPC), + .PYLD_FIFO_SIZE (MTU), + .MTU (MTU) + ) noc_shell_ddc_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .rfnoc_chdr_rst (rfnoc_chdr_rst), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk), + .rfnoc_ctrl_rst (), + .rfnoc_core_config (rfnoc_core_config), + .rfnoc_core_status (rfnoc_core_status), + .s_rfnoc_chdr_tdata (s_rfnoc_chdr_tdata), + .s_rfnoc_chdr_tlast (s_rfnoc_chdr_tlast), + .s_rfnoc_chdr_tvalid (s_rfnoc_chdr_tvalid), + .s_rfnoc_chdr_tready (s_rfnoc_chdr_tready), + .m_rfnoc_chdr_tdata (m_rfnoc_chdr_tdata), + .m_rfnoc_chdr_tlast (m_rfnoc_chdr_tlast), + .m_rfnoc_chdr_tvalid (m_rfnoc_chdr_tvalid), + .m_rfnoc_chdr_tready (m_rfnoc_chdr_tready), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready), + .ctrlport_clk (ce_clk), + .ctrlport_rst (ddc_rst), + .m_ctrlport_req_wr (ctrlport_req_wr), + .m_ctrlport_req_rd (ctrlport_req_rd), + .m_ctrlport_req_addr (ctrlport_req_addr), + .m_ctrlport_req_data (ctrlport_req_data), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (ctrlport_req_has_time), + .m_ctrlport_req_time (ctrlport_req_time), + .m_ctrlport_resp_ack (ctrlport_resp_ack), + .m_ctrlport_resp_status (AXIS_CTRL_STS_OKAY), + .m_ctrlport_resp_data (ctrlport_resp_data), + .s_ctrlport_req_wr (1'b0), + .s_ctrlport_req_rd (1'b0), + .s_ctrlport_req_addr (20'b0), + .s_ctrlport_req_portid (10'b0), + .s_ctrlport_req_rem_epid (16'b0), + .s_ctrlport_req_rem_portid (10'b0), + .s_ctrlport_req_data (32'b0), + .s_ctrlport_req_byte_en (4'b0), + .s_ctrlport_req_has_time (1'b0), + .s_ctrlport_req_time (64'b0), + .s_ctrlport_resp_ack (), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (), + .axis_data_clk (ce_clk), + .axis_data_rst (ddc_rst), + .m_axis_tdata (m_axis_data_tdata), + .m_axis_tkeep (), + .m_axis_tlast (m_axis_data_tlast), + .m_axis_tvalid (m_axis_data_tvalid), + .m_axis_tready (m_axis_data_tready), + .m_axis_ttimestamp (m_axis_data_ttimestamp), + .m_axis_thas_time (m_axis_data_thas_time), + .m_axis_tlength (m_axis_data_tlength), + .m_axis_teov (), + .m_axis_teob (m_axis_data_teob), + .s_axis_tdata (s_axis_data_tdata), + .s_axis_tkeep ({NUM_PORTS*NIPC{1'b1}}), + .s_axis_tlast (s_axis_data_tlast), + .s_axis_tvalid (s_axis_data_tvalid), + .s_axis_tready (s_axis_data_tready), + .s_axis_ttimestamp (s_axis_data_ttimestamp), + .s_axis_thas_time (s_axis_data_thas_time), + .s_axis_teov ({NUM_PORTS{1'b0}}), + .s_axis_teob (s_axis_data_teob) + ); + + + //--------------------------------------------------------------------------- + // Register Translation + //--------------------------------------------------------------------------- + // + // Each DDC block is allocated an address spaces. This block translates CTRL + // port transactions in that space to settings bus. + // + //--------------------------------------------------------------------------- + + wire [ 8*NUM_PORTS-1:0] set_addr; + wire [32*NUM_PORTS-1:0] set_data; + wire [ NUM_PORTS-1:0] set_has_time; + wire [ NUM_PORTS-1:0] set_stb; + wire [64*NUM_PORTS-1:0] set_time; + wire [ 8*NUM_PORTS-1:0] rb_addr; + reg [64*NUM_PORTS-1:0] rb_data; + wire [ NUM_PORTS-1:0] rb_stb; + + ctrlport_to_settings_bus # ( + .NUM_PORTS (NUM_PORTS) + ) ctrlport_to_settings_bus_i ( + .ctrlport_clk (ce_clk), + .ctrlport_rst (ddc_rst), + .s_ctrlport_req_wr (ctrlport_req_wr), + .s_ctrlport_req_rd (ctrlport_req_rd), + .s_ctrlport_req_addr (ctrlport_req_addr), + .s_ctrlport_req_data (ctrlport_req_data), + .s_ctrlport_req_has_time (ctrlport_req_has_time), + .s_ctrlport_req_time (ctrlport_req_time), + .s_ctrlport_resp_ack (ctrlport_resp_ack), + .s_ctrlport_resp_data (ctrlport_resp_data), + .set_data (set_data), + .set_addr (set_addr), + .set_stb (set_stb), + .set_time (set_time), + .set_has_time (set_has_time), + .rb_stb (rb_stb), + .rb_addr (rb_addr), + .rb_data (rb_data)); + + + //--------------------------------------------------------------------------- + // DDC Implementation + //--------------------------------------------------------------------------- + + // Unused signals + wire [ NUM_PORTS-1:0] clear_tx_seqnum = 0; + wire [16*NUM_PORTS-1:0] src_sid = 0; + wire [16*NUM_PORTS-1:0] next_dst_sid = 0; + + localparam MAX_N = CIC_MAX_DECIM * 2 << (NUM_HB-1); + + genvar i; + generate + for (i = 0; i < NUM_PORTS; i = i + 1) begin : gen_ddc_chains + wire set_stb_int = set_stb[i]; + wire [7:0] set_addr_int = set_addr[8*i+7:8*i]; + wire [31:0] set_data_int = set_data[32*i+31:32*i]; + wire [63:0] set_time_int = set_time[64*i+63:64*i]; + wire set_has_time_int = set_has_time[i]; + + // Build the expected tuser CHDR header + cvita_hdr_encoder cvita_hdr_encoder_i ( + .pkt_type (2'b0), + .eob (m_axis_data_teob[i]), + .has_time (m_axis_data_thas_time[i]), + .seqnum (12'b0), + .payload_length (m_axis_data_tlength[16*i +: 16]), + .src_sid (16'b0), + .dst_sid (16'b0), + .vita_time (m_axis_data_ttimestamp[64*i +: 64]), + .header (m_axis_data_tuser[128*i+:128]) + ); + + // Extract bit fields from outgoing tuser CHDR header + assign s_axis_data_teob[i] = s_axis_data_tuser[128*i+124 +: 1]; + assign s_axis_data_thas_time[i] = s_axis_data_tuser[128*i+125 +: 1]; + assign s_axis_data_ttimestamp[64*i+:64] = s_axis_data_tuser[128*i+ 0 +: 64]; + + // TODO: Read-back register for number of FIR filter taps + always @(*) begin + case(rb_addr[8*i+7:8*i]) + RB_COMPAT_NUM : rb_data[64*i+63:64*i] <= {COMPAT_MAJOR, COMPAT_MINOR}; + RB_NUM_HB : rb_data[64*i+63:64*i] <= NUM_HB; + RB_CIC_MAX_DECIM : rb_data[64*i+63:64*i] <= CIC_MAX_DECIM; + default : rb_data[64*i+63:64*i] <= 64'h0BADC0DE0BADC0DE; + endcase + end + + //////////////////////////////////////////////////////////// + // + // Timed Commands + // + //////////////////////////////////////////////////////////// + wire [31:0] m_axis_tagged_tdata; + wire m_axis_tagged_tlast; + wire m_axis_tagged_tvalid; + wire m_axis_tagged_tready; + wire [127:0] m_axis_tagged_tuser; + wire m_axis_tagged_tag; + + wire out_set_stb; + wire [7:0] out_set_addr; + wire [31:0] out_set_data; + wire timed_set_stb; + wire [7:0] timed_set_addr; + wire [31:0] timed_set_data; + + wire timed_cmd_fifo_full; + + axi_tag_time #( + .NUM_TAGS(1), + .SR_TAG_ADDRS(SR_FREQ_ADDR)) + axi_tag_time ( + .clk(ce_clk), + .reset(ddc_rst), + .clear(clear_tx_seqnum[i]), + .tick_rate(16'd1), + .timed_cmd_fifo_full(timed_cmd_fifo_full), + .s_axis_data_tdata(m_axis_data_tdata[i*ITEM_W+:ITEM_W]), .s_axis_data_tlast(m_axis_data_tlast[i]), + .s_axis_data_tvalid(m_axis_data_tvalid[i]), .s_axis_data_tready(m_axis_data_tready[i]), + .s_axis_data_tuser(m_axis_data_tuser[128*i+:128]), + .m_axis_data_tdata(m_axis_tagged_tdata), .m_axis_data_tlast(m_axis_tagged_tlast), + .m_axis_data_tvalid(m_axis_tagged_tvalid), .m_axis_data_tready(m_axis_tagged_tready), + .m_axis_data_tuser(m_axis_tagged_tuser), .m_axis_data_tag(m_axis_tagged_tag), + .in_set_stb(set_stb_int), .in_set_addr(set_addr_int), .in_set_data(set_data_int), + .in_set_time(set_time_int), .in_set_has_time(set_has_time_int), + .out_set_stb(out_set_stb), .out_set_addr(out_set_addr), .out_set_data(out_set_data), + .timed_set_stb(timed_set_stb), .timed_set_addr(timed_set_addr), .timed_set_data(timed_set_data)); + + // Hold off reading additional commands if internal FIFO is full + assign rb_stb[i] = ~timed_cmd_fifo_full; + + //////////////////////////////////////////////////////////// + // + // Reduce Rate + // + //////////////////////////////////////////////////////////// + wire [31:0] sample_in_tdata, sample_out_tdata; + wire sample_in_tuser, sample_in_eob; + wire sample_in_tvalid, sample_in_tready, sample_in_tlast; + wire sample_out_tvalid, sample_out_tready; + wire clear_user; + wire nc; + axi_rate_change #( + .WIDTH(33), + .MAX_N(MAX_N), + .MAX_M(1), + .SR_N_ADDR(SR_N_ADDR), + .SR_M_ADDR(SR_M_ADDR), + .SR_CONFIG_ADDR(SR_CONFIG_ADDR)) + axi_rate_change ( + .clk(ce_clk), .reset(ddc_rst), .clear(clear_tx_seqnum[i]), .clear_user(clear_user), + .src_sid(src_sid[16*i+15:16*i]), .dst_sid(next_dst_sid[16*i+15:16*i]), + .set_stb(out_set_stb), .set_addr(out_set_addr), .set_data(out_set_data), + .i_tdata({m_axis_tagged_tag,m_axis_tagged_tdata}), .i_tlast(m_axis_tagged_tlast), + .i_tvalid(m_axis_tagged_tvalid), .i_tready(m_axis_tagged_tready), + .i_tuser(m_axis_tagged_tuser), + .o_tdata({nc,s_axis_data_tdata[i*ITEM_W+:ITEM_W]}), .o_tlast(s_axis_data_tlast[i]), .o_tvalid(s_axis_data_tvalid[i]), + .o_tready(s_axis_data_tready[i]), .o_tuser(s_axis_data_tuser[128*i+:128]), + .m_axis_data_tdata({sample_in_tuser,sample_in_tdata}), .m_axis_data_tlast(sample_in_tlast), + .m_axis_data_tvalid(sample_in_tvalid), .m_axis_data_tready(sample_in_tready), + .s_axis_data_tdata({1'b0,sample_out_tdata}), .s_axis_data_tlast(1'b0), + .s_axis_data_tvalid(sample_out_tvalid), .s_axis_data_tready(sample_out_tready), + .warning_long_throttle(), + .error_extra_outputs(), + .error_drop_pkt_lockup()); + + assign sample_in_eob = m_axis_tagged_tuser[124]; //this should align with last packet output from axi_rate_change + + //////////////////////////////////////////////////////////// + // + // Digital Down Converter + // + //////////////////////////////////////////////////////////// + + ddc #( + .SR_FREQ_ADDR(SR_FREQ_ADDR), + .SR_SCALE_IQ_ADDR(SR_SCALE_IQ_ADDR), + .SR_DECIM_ADDR(SR_DECIM_ADDR), + .SR_MUX_ADDR(SR_MUX_ADDR), + .SR_COEFFS_ADDR(SR_COEFFS_ADDR), + .NUM_HB(NUM_HB), + .CIC_MAX_DECIM(CIC_MAX_DECIM)) + ddc ( + .clk(ce_clk), .reset(ddc_rst), + .clear(clear_user | clear_tx_seqnum[i]), // Use AXI Rate Change's clear user to reset block to initial state after EOB + .set_stb(out_set_stb), .set_addr(out_set_addr), .set_data(out_set_data), + .timed_set_stb(timed_set_stb), .timed_set_addr(timed_set_addr), .timed_set_data(timed_set_data), + .sample_in_tdata(sample_in_tdata), .sample_in_tlast(sample_in_tlast), + .sample_in_tvalid(sample_in_tvalid), .sample_in_tready(sample_in_tready), + .sample_in_tuser(sample_in_tuser), .sample_in_eob(sample_in_eob), + .sample_out_tdata(sample_out_tdata), .sample_out_tlast(), + .sample_out_tvalid(sample_out_tvalid), .sample_out_tready(sample_out_tready) + ); + + end + endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/rfnoc_block_ddc_regs.vh b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/rfnoc_block_ddc_regs.vh new file mode 100644 index 000000000..bc1bf4c46 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/rfnoc_block_ddc_regs.vh @@ -0,0 +1,27 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_ddc_regs (Header) +// +// Description: Header file for RFNoC DDC functionality. This includes +// register offsets, bitfields and constants for the radio components. +// + +// For now, these offsets match the original DDC +localparam DDC_BASE_ADDR = 'h00; +localparam DDC_ADDR_W = 8; + +localparam RB_COMPAT_NUM = 0; +localparam RB_NUM_HB = 1; +localparam RB_CIC_MAX_DECIM = 2; +localparam SR_N_ADDR = 128; +localparam SR_M_ADDR = 129; +localparam SR_CONFIG_ADDR = 130; +localparam SR_FREQ_ADDR = 132; +localparam SR_SCALE_IQ_ADDR = 133; +localparam SR_DECIM_ADDR = 134; +localparam SR_MUX_ADDR = 135; +localparam SR_COEFFS_ADDR = 136; + diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/rfnoc_block_ddc_tb.sv b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/rfnoc_block_ddc_tb.sv new file mode 100644 index 000000000..8b0790909 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_ddc/rfnoc_block_ddc_tb.sv @@ -0,0 +1,386 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_ddc_tb +// +// Description: Testbench for rfnoc_block_ddc +// + + +module rfnoc_block_ddc_tb(); + + // Include macros and time declarations for use with PkgTestExec + `include "test_exec.svh" + + import PkgTestExec::*; + import PkgChdrUtils::*; + import PkgRfnocBlockCtrlBfm::*; + + `include "rfnoc_block_ddc_regs.vh" + + + //--------------------------------------------------------------------------- + // Local Parameters + //--------------------------------------------------------------------------- + + // Simulation parameters + localparam real CHDR_CLK_PER = 5.0; // CHDR clock rate + localparam real DDC_CLK_PER = 4.0; // DUC IP clock rate + localparam int EXTENDED_TEST = 0; // Perform a longer test + localparam int SPP = 256; // Samples per packet + localparam int PKT_SIZE_BYTES = SPP*4; // Bytes per packet + localparam int STALL_PROB = 25; // BFM stall probability + + // Block configuration + localparam int CHDR_W = 64; + localparam int THIS_PORTID = 'h123; + localparam int MTU = 8; + localparam int NUM_PORTS = 1; + localparam int NUM_HB = 3; + localparam int CIC_MAX_DECIM = 255; + + + //--------------------------------------------------------------------------- + // Clocks + //--------------------------------------------------------------------------- + + bit rfnoc_chdr_clk; + bit rfnoc_ctrl_clk; + bit ce_clk; + + sim_clock_gen #(CHDR_CLK_PER) rfnoc_chdr_clk_gen (.clk(rfnoc_chdr_clk), .rst()); + sim_clock_gen #(CHDR_CLK_PER) rfnoc_ctrl_clk_gen (.clk(rfnoc_ctrl_clk), .rst()); + sim_clock_gen #(DDC_CLK_PER) ddc_clk_gen (.clk(ce_clk), .rst()); + + + //--------------------------------------------------------------------------- + // Bus Functional Models + //--------------------------------------------------------------------------- + + RfnocBackendIf backend (rfnoc_chdr_clk, rfnoc_ctrl_clk); + AxiStreamIf #(32) m_ctrl (rfnoc_ctrl_clk, 1'b0); + AxiStreamIf #(32) s_ctrl (rfnoc_ctrl_clk, 1'b0); + AxiStreamIf #(CHDR_W) m_chdr [NUM_PORTS] (rfnoc_chdr_clk, 1'b0); + AxiStreamIf #(CHDR_W) s_chdr [NUM_PORTS] (rfnoc_chdr_clk, 1'b0); + + // Bus functional model for a software block controller + RfnocBlockCtrlBfm #(.CHDR_W(CHDR_W)) blk_ctrl = + new(backend, m_ctrl, s_ctrl); + + // Connect block controller to BFMs + for (genvar i = 0; i < NUM_PORTS; i++) begin : gen_bfm_connections + initial begin + blk_ctrl.connect_master_data_port(i, m_chdr[i], PKT_SIZE_BYTES); + blk_ctrl.connect_slave_data_port(i, s_chdr[i]); + blk_ctrl.set_master_stall_prob(i, STALL_PROB); + blk_ctrl.set_slave_stall_prob(i, STALL_PROB); + end + end + + + //--------------------------------------------------------------------------- + // DUT + //--------------------------------------------------------------------------- + + logic [NUM_PORTS*CHDR_W-1:0] s_rfnoc_chdr_tdata; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tlast; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tvalid; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tready; + + logic [NUM_PORTS*CHDR_W-1:0] m_rfnoc_chdr_tdata; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tlast; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tvalid; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tready; + + // Map the array of BFMs to a flat vector for the DUT + genvar i; + for (i = 0; i < NUM_PORTS; i++) begin : gen_dut_connections + // Connect BFM master to DUT slave port + assign s_rfnoc_chdr_tdata[CHDR_W*i+:CHDR_W] = m_chdr[i].tdata; + assign s_rfnoc_chdr_tlast[i] = m_chdr[i].tlast; + assign s_rfnoc_chdr_tvalid[i] = m_chdr[i].tvalid; + assign m_chdr[i].tready = s_rfnoc_chdr_tready[i]; + + // Connect BFM slave to DUT master port + assign s_chdr[i].tdata = m_rfnoc_chdr_tdata[CHDR_W*i+:CHDR_W]; + assign s_chdr[i].tlast = m_rfnoc_chdr_tlast[i]; + assign s_chdr[i].tvalid = m_rfnoc_chdr_tvalid[i]; + assign m_rfnoc_chdr_tready[i] = s_chdr[i].tready; + end + + rfnoc_block_ddc #( + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W), + .NUM_PORTS (NUM_PORTS), + .MTU (MTU), + .NUM_HB (NUM_HB), + .CIC_MAX_DECIM (CIC_MAX_DECIM) + ) rfnoc_block_ddc_i ( + .rfnoc_chdr_clk (backend.chdr_clk), + .ce_clk (ce_clk), + .s_rfnoc_chdr_tdata (s_rfnoc_chdr_tdata), + .s_rfnoc_chdr_tlast (s_rfnoc_chdr_tlast), + .s_rfnoc_chdr_tvalid (s_rfnoc_chdr_tvalid), + .s_rfnoc_chdr_tready (s_rfnoc_chdr_tready), + .m_rfnoc_chdr_tdata (m_rfnoc_chdr_tdata), + .m_rfnoc_chdr_tlast (m_rfnoc_chdr_tlast), + .m_rfnoc_chdr_tvalid (m_rfnoc_chdr_tvalid), + .m_rfnoc_chdr_tready (m_rfnoc_chdr_tready), + .rfnoc_core_config (backend.cfg), + .rfnoc_core_status (backend.sts), + .rfnoc_ctrl_clk (backend.ctrl_clk), + .s_rfnoc_ctrl_tdata (m_ctrl.tdata), + .s_rfnoc_ctrl_tlast (m_ctrl.tlast), + .s_rfnoc_ctrl_tvalid (m_ctrl.tvalid), + .s_rfnoc_ctrl_tready (m_ctrl.tready), + .m_rfnoc_ctrl_tdata (s_ctrl.tdata), + .m_rfnoc_ctrl_tlast (s_ctrl.tlast), + .m_rfnoc_ctrl_tvalid (s_ctrl.tvalid), + .m_rfnoc_ctrl_tready (s_ctrl.tready) + ); + + + //--------------------------------------------------------------------------- + // Helper Tasks + //--------------------------------------------------------------------------- + + // Translate the desired register access to a ctrlport write request. + task automatic write_reg(int port, byte addr, bit [31:0] value); + blk_ctrl.reg_write(256*8*port + addr*8, value); + endtask : write_reg + + + // Translate the desired register access to a ctrlport read request. + task automatic read_user_reg(int port, byte addr, output logic [63:0] value); + blk_ctrl.reg_read(256*8*port + addr*8 + 0, value[31: 0]); + blk_ctrl.reg_read(256*8*port + addr*8 + 4, value[63:32]); + endtask : read_user_reg + + + task automatic set_decim_rate(int port, input int decim_rate); + logic [7:0] cic_rate; + logic [1:0] hb_enables; + int _decim_rate; + + cic_rate = 8'd0; + hb_enables = 2'b0; + _decim_rate = decim_rate; + + // Calculate which half bands to enable and whatever is left over set the CIC + while ((_decim_rate[0] == 0) && (hb_enables < NUM_HB)) begin + hb_enables += 1'b1; + _decim_rate = _decim_rate >> 1; + end + // CIC rate cannot be set to 0 + cic_rate = (_decim_rate[7:0] == 8'd0) ? 8'd1 : _decim_rate[7:0]; + `ASSERT_ERROR( + hb_enables <= NUM_HB, + "Enabled halfbands may not exceed total number of half bands." + ); + `ASSERT_ERROR( + cic_rate > 0 && cic_rate <= CIC_MAX_DECIM, + "CIC Decimation rate must be positive, not exceed the max cic decimation rate, and cannot equal 0!" + ); + + // Setup DDC + $display("Set decimation to %0d", decim_rate); + $display("- Number of enabled HBs: %0d", hb_enables); + $display("- CIC Rate: %0d", cic_rate); + write_reg(port, SR_N_ADDR, decim_rate); // Set decimation rate in AXI rate change + write_reg(port, SR_DECIM_ADDR, {hb_enables,cic_rate}); // Enable HBs, set CIC rate + endtask + + + task automatic send_ramp ( + input int unsigned port, + input int unsigned decim_rate, + // (Optional) For testing passing through partial packets + input logic drop_partial_packet = 1'b0, + input int unsigned extra_samples = 0 + ); + set_decim_rate(port, decim_rate); + + // Setup DDC + write_reg(port, SR_CONFIG_ADDR, 32'd1); // Enable clear EOB + write_reg(port, SR_FREQ_ADDR, 32'd0); // Phase increment + write_reg(port, SR_SCALE_IQ_ADDR, (1 << 14)); // Scaling, set to 1 + + // Send a short ramp, should pass through unchanged + fork + begin + chdr_word_t send_payload[$]; + packet_info_t pkt_info; + + pkt_info = 0; + for (int i = 0; i < decim_rate*(PKT_SIZE_BYTES/8 + extra_samples); i++) begin + send_payload.push_back({16'(2*i/decim_rate), 16'(2*i/decim_rate), 16'((2*i+1)/decim_rate), 16'((2*i+1)/decim_rate)}); + end + $display("Send ramp (%0d words)", send_payload.size()); + pkt_info.eob = 1; + blk_ctrl.send_packets(port, send_payload, /*data_bytes*/, /*metadata*/, pkt_info); + blk_ctrl.wait_complete(port); + $display("Send ramp complete"); + end + begin + string s; + logic [63:0] samples, samples_old; + chdr_word_t recv_payload[$], temp_payload[$]; + chdr_word_t metadata[$]; + int data_bytes; + packet_info_t pkt_info; + + $display("Check ramp"); + if (~drop_partial_packet && (extra_samples > 0)) begin + blk_ctrl.recv_adv(port, temp_payload, data_bytes, metadata, pkt_info); + $sformat(s, "Invalid EOB state! Expected %b, Received: %b", 1'b0, pkt_info.eob); + `ASSERT_ERROR(pkt_info.eob == 1'b0, s); + end + $display("Receiving packet"); + blk_ctrl.recv_adv(port, recv_payload, data_bytes, metadata, pkt_info); + $display("Received!"); + $sformat(s, "Invalid EOB state! Expected %b, Received: %b", 1'b1, pkt_info.eob); + `ASSERT_ERROR(pkt_info.eob == 1'b1, s); + recv_payload = {temp_payload, recv_payload}; + if (drop_partial_packet) begin + $sformat(s, "Incorrect packet size! Expected: %0d, Actual: %0d", PKT_SIZE_BYTES/8, recv_payload.size()); + `ASSERT_ERROR(recv_payload.size() == PKT_SIZE_BYTES/8, s); + end else begin + $sformat(s, "Incorrect packet size! Expected: %0d, Actual: %0d", PKT_SIZE_BYTES/8, recv_payload.size() + extra_samples); + `ASSERT_ERROR(recv_payload.size() == PKT_SIZE_BYTES/8 + extra_samples, s); + end + samples = 64'd0; + samples_old = 64'd0; + for (int i = 0; i < PKT_SIZE_BYTES/8; i++) begin + samples = recv_payload[i]; + for (int j = 0; j < 4; j++) begin + // Need to check a range of values due to imperfect gain compensation + $sformat(s, "Ramp word %0d invalid! Expected: %0d-%0d, Received: %0d", 2*i, + samples_old[16*j +: 16], samples_old[16*j +: 16]+16'd4, samples[16*j +: 16]); + `ASSERT_ERROR((samples_old[16*j +: 16]+16'd4 >= samples[16*j +: 16]) && (samples >= samples_old[16*j +: 16]), s); + end + samples_old = samples; + end + $display("Check complete"); + end + join + endtask + + + //--------------------------------------------------------------------------- + // Test Process + //--------------------------------------------------------------------------- + + initial begin : tb_main + const int port = 0; + test.start_tb("rfnoc_block_ddc_tb"); + + // Start the BFMs running + blk_ctrl.run(); + + + //------------------------------------------------------------------------- + // Reset + //------------------------------------------------------------------------- + + test.start_test("Wait for Reset", 10us); + fork + blk_ctrl.reset_chdr(); + blk_ctrl.reset_ctrl(); + join; + test.end_test(); + + + //------------------------------------------------------------------------- + // Check NoC ID and Block Info + //------------------------------------------------------------------------- + + test.start_test("Verify Block Info", 2us); + `ASSERT_ERROR(blk_ctrl.get_noc_id() == rfnoc_block_ddc_i.NOC_ID, "Incorrect NOC_ID Value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_i() == NUM_PORTS, "Incorrect NUM_DATA_I Value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_o() == NUM_PORTS, "Incorrect NUM_DATA_O Value"); + `ASSERT_ERROR(blk_ctrl.get_mtu() == MTU, "Incorrect MTU Value"); + test.end_test(); + + + //------------------------------------------------------------------------- + // Test read-back regs + //------------------------------------------------------------------------- + + begin + logic [63:0] val64; + test.start_test("Test registers", 10us); + read_user_reg(port, RB_NUM_HB, val64); + `ASSERT_ERROR(val64 == NUM_HB, "Register NUM_HB didn't read back expected value"); + read_user_reg(port, RB_CIC_MAX_DECIM, val64); + `ASSERT_ERROR(val64 == CIC_MAX_DECIM, "Register CIC_MAX_DECIM didn't read back expected value"); + test.end_test(); + end + + + //------------------------------------------------------------------------- + // Test various decimation rates + //------------------------------------------------------------------------- + + begin + test.start_test("Decimate by 1, 2, 3, 4, 6, 8, 12, 13, 16, 24, 40, 255, 2040", 0.5ms); + + $display("Note: This test will take a long time!"); + + // List of rates to catch most issues + send_ramp(port, 1); // HBs enabled: 0, CIC rate: 1 + send_ramp(port, 2); // HBs enabled: 1, CIC rate: 1 + send_ramp(port, 3); // HBs enabled: 0, CIC rate: 3 + send_ramp(port, 4); // HBs enabled: 2, CIC rate: 1 + if (EXTENDED_TEST) send_ramp(port, 6); // HBs enabled: 1, CIC rate: 3 + send_ramp(port, 8); // HBs enabled: 3, CIC rate: 1 + send_ramp(port, 12); // HBs enabled: 2, CIC rate: 3 + send_ramp(port, 13); // HBs enabled: 0, CIC rate: 13 + if (EXTENDED_TEST) send_ramp(port, 16); // HBs enabled: 3, CIC rate: 2 + if (EXTENDED_TEST) send_ramp(port, 24); // HBs enabled: 3, CIC rate: 3 + send_ramp(port, 40); // HBs enabled: 3, CIC rate: 5 + if (EXTENDED_TEST) send_ramp(port, 200); // HBs enabled: 3, CIC rate: 25 + send_ramp(port, 255); // HBs enabled: 0, CIC rate: 255 + if (EXTENDED_TEST) send_ramp(port, 2040); // HBs enabled: 3, CIC rate: 255 + + test.end_test(); + end + + + //------------------------------------------------------------------------- + // Test timed tune + //------------------------------------------------------------------------- + + // This test has not been implemented because the RFNoC FFT has not been + // ported yet. + + + //------------------------------------------------------------------------- + // Test passing through a partial packet + //------------------------------------------------------------------------- + + test.start_test("Pass through partial packet"); + send_ramp(port, 2, 0, 4); + send_ramp(port, 3, 0, 4); + send_ramp(port, 4, 0, 4); + if (EXTENDED_TEST) send_ramp(port, 8, 0, 4); + send_ramp(port, 13, 0, 4); + if (EXTENDED_TEST) send_ramp(port, 24, 0, 4); + test.end_test(); + + + //------------------------------------------------------------------------- + // Finish + //------------------------------------------------------------------------- + + // End the TB, but don't $finish, since we don't want to kill other + // instances of this testbench that may be running. + test.end_tb(0); + + // Kill the clocks to end this instance of the testbench + rfnoc_chdr_clk_gen.kill(); + rfnoc_ctrl_clk_gen.kill(); + ddc_clk_gen.kill(); + end +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/Makefile b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/Makefile new file mode 100644 index 000000000..6d1da3d60 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/Makefile @@ -0,0 +1,67 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# IP Specific +#------------------------------------------------- +# If simulation contains IP, define the IP_DIR and point +# it to the base level IP directory +LIB_IP_DIR = $(BASE_DIR)/../lib/ip + +# Include makefiles and sources for all IP components +# *after* defining the LIB_IP_DIR +include $(LIB_IP_DIR)/axi_hb47/Makefile.inc +include $(LIB_IP_DIR)/complex_multiplier_dds/Makefile.inc +include $(LIB_IP_DIR)/dds_sin_cos_lut_only/Makefile.inc +include $(BASE_DIR)/x300/coregen_dsp/Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(LIB_IP_AXI_HB47_SRCS) \ +$(LIB_IP_COMPLEX_MULTIPLIER_DDS_SRCS) \ +$(LIB_IP_DDS_SIN_COS_LUT_ONLY_SRCS) \ +$(COREGEN_DSP_SRCS) \ +) + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/utils/Makefile.srcs +include Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(RFNOC_CORE_SRCS) \ +$(RFNOC_UTIL_SRCS) \ +$(RFNOC_BLOCK_DUC_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +SIM_TOP = rfnoc_block_duc_tb + +# Add test bench, user design under test, and +# additional user created files +SIM_SRCS = \ +$(abspath rfnoc_block_duc_tb.sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/Makefile.srcs b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/Makefile.srcs new file mode 100644 index 000000000..69b6eaece --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/Makefile.srcs @@ -0,0 +1,11 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +RFNOC_BLOCK_DUC_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/blocks/rfnoc_block_duc/, \ +../rfnoc_block_ddc/noc_shell_ddc.v \ +rfnoc_block_duc_regs.vh \ +rfnoc_block_duc.v \ +)) diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/rfnoc_block_duc.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/rfnoc_block_duc.v new file mode 100644 index 000000000..400e9d270 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/rfnoc_block_duc.v @@ -0,0 +1,387 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_duc +// +// Description: An digital up-converter block for RFNoC. +// +// Parameters: +// +// THIS_PORTID : Control crossbar port to which this block is connected +// CHDR_W : AXIS CHDR interface data width +// NUM_PORTS : Number of DUC signal processing chains +// MTU : Maximum transmission unit (i.e., maximum packet size) in +// CHDR words is 2**MTU. +// CTRL_FIFO_SIZE : Size of the Control Port slave FIFO. This affects the +// number of outstanding commands that can be pending. +// NUM_HB : Number of half-band filter blocks to include (0-3) +// CIC_MAX_INTERP : Maximum interpolation to support in the CIC filter +// + +module rfnoc_block_duc #( + parameter THIS_PORTID = 0, + parameter CHDR_W = 64, + parameter NUM_PORTS = 2, + parameter MTU = 10, + parameter CTRL_FIFO_SIZE = 6, + parameter NUM_HB = 2, + parameter CIC_MAX_INTERP = 128 +) ( + //--------------------------------------------------------------------------- + // AXIS CHDR Port + //--------------------------------------------------------------------------- + + input wire rfnoc_chdr_clk, + input wire ce_clk, + + // CHDR inputs from framework + input wire [NUM_PORTS*CHDR_W-1:0] s_rfnoc_chdr_tdata, + input wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tlast, + input wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tvalid, + output wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tready, + + // CHDR outputs to framework + output wire [NUM_PORTS*CHDR_W-1:0] m_rfnoc_chdr_tdata, + output wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tlast, + output wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tvalid, + input wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tready, + + // Backend interface + input wire [511:0] rfnoc_core_config, + output wire [511:0] rfnoc_core_status, + + //--------------------------------------------------------------------------- + // AXIS CTRL Port + //--------------------------------------------------------------------------- + + input wire rfnoc_ctrl_clk, + + // CTRL port requests from framework + input wire [31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + + // CTRL port requests to framework + output wire [31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready +); + + // These are the only supported values for now + localparam ITEM_W = 32; + localparam NIPC = 1; + + localparam NOC_ID = 'hD0C0_0000; + + localparam COMPAT_MAJOR = 16'h0; + localparam COMPAT_MINOR = 16'h0; + + `include "rfnoc_block_duc_regs.vh" + `include "../../core/rfnoc_axis_ctrl_utils.vh" + + + //--------------------------------------------------------------------------- + // Signal Declarations + //--------------------------------------------------------------------------- + + wire rfnoc_chdr_rst; + + wire ctrlport_req_wr; + wire ctrlport_req_rd; + wire [19:0] ctrlport_req_addr; + wire [31:0] ctrlport_req_data; + wire ctrlport_req_has_time; + wire [63:0] ctrlport_req_time; + wire ctrlport_resp_ack; + wire [31:0] ctrlport_resp_data; + + wire [NUM_PORTS*ITEM_W-1:0] m_axis_data_tdata; + wire [ NUM_PORTS-1:0] m_axis_data_tlast; + wire [ NUM_PORTS-1:0] m_axis_data_tvalid; + wire [ NUM_PORTS-1:0] m_axis_data_tready; + wire [ NUM_PORTS*64-1:0] m_axis_data_ttimestamp; + wire [ NUM_PORTS-1:0] m_axis_data_thas_time; + wire [ NUM_PORTS*16-1:0] m_axis_data_tlength; + wire [ NUM_PORTS-1:0] m_axis_data_teob; + wire [ NUM_PORTS*128-1:0] m_axis_data_tuser; + + wire [NUM_PORTS*ITEM_W-1:0] s_axis_data_tdata; + wire [ NUM_PORTS-1:0] s_axis_data_tlast; + wire [ NUM_PORTS-1:0] s_axis_data_tvalid; + wire [ NUM_PORTS-1:0] s_axis_data_tready; + wire [ NUM_PORTS*128-1:0] s_axis_data_tuser; + wire [ NUM_PORTS-1:0] s_axis_data_teob; + wire [ NUM_PORTS*64-1:0] s_axis_data_ttimestamp; + wire [ NUM_PORTS-1:0] s_axis_data_thas_time; + + wire duc_rst; + + // Cross the CHDR reset to the ce_clk domain + synchronizer duc_rst_sync_i ( + .clk (ce_clk), + .rst (1'b0), + .in (rfnoc_chdr_rst), + .out (duc_rst) + ); + + + //--------------------------------------------------------------------------- + // NoC Shell + //--------------------------------------------------------------------------- + + // TODO: Replace noc_shell_radio with a customized block + noc_shell_ddc #( + .NOC_ID (NOC_ID), + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W), + .CTRLPORT_SLV_EN (0), + .CTRLPORT_MST_EN (1), + .CTRL_FIFO_SIZE (CTRL_FIFO_SIZE), + .NUM_DATA_I (NUM_PORTS), + .NUM_DATA_O (NUM_PORTS), + .ITEM_W (ITEM_W), + .NIPC (NIPC), + .PYLD_FIFO_SIZE (MTU), + .MTU (MTU) + ) noc_shell_ddc_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .rfnoc_chdr_rst (rfnoc_chdr_rst), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk), + .rfnoc_ctrl_rst (), + .rfnoc_core_config (rfnoc_core_config), + .rfnoc_core_status (rfnoc_core_status), + .s_rfnoc_chdr_tdata (s_rfnoc_chdr_tdata), + .s_rfnoc_chdr_tlast (s_rfnoc_chdr_tlast), + .s_rfnoc_chdr_tvalid (s_rfnoc_chdr_tvalid), + .s_rfnoc_chdr_tready (s_rfnoc_chdr_tready), + .m_rfnoc_chdr_tdata (m_rfnoc_chdr_tdata), + .m_rfnoc_chdr_tlast (m_rfnoc_chdr_tlast), + .m_rfnoc_chdr_tvalid (m_rfnoc_chdr_tvalid), + .m_rfnoc_chdr_tready (m_rfnoc_chdr_tready), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready), + .ctrlport_clk (ce_clk), + .ctrlport_rst (duc_rst), + .m_ctrlport_req_wr (ctrlport_req_wr), + .m_ctrlport_req_rd (ctrlport_req_rd), + .m_ctrlport_req_addr (ctrlport_req_addr), + .m_ctrlport_req_data (ctrlport_req_data), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (ctrlport_req_has_time), + .m_ctrlport_req_time (ctrlport_req_time), + .m_ctrlport_resp_ack (ctrlport_resp_ack), + .m_ctrlport_resp_status (AXIS_CTRL_STS_OKAY), + .m_ctrlport_resp_data (ctrlport_resp_data), + .s_ctrlport_req_wr (1'b0), + .s_ctrlport_req_rd (1'b0), + .s_ctrlport_req_addr (20'b0), + .s_ctrlport_req_portid (10'b0), + .s_ctrlport_req_rem_epid (16'b0), + .s_ctrlport_req_rem_portid (10'b0), + .s_ctrlport_req_data (32'b0), + .s_ctrlport_req_byte_en (4'b0), + .s_ctrlport_req_has_time (1'b0), + .s_ctrlport_req_time (64'b0), + .s_ctrlport_resp_ack (), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (), + .axis_data_clk (ce_clk), + .axis_data_rst (duc_rst), + .m_axis_tdata (m_axis_data_tdata), + .m_axis_tkeep (), + .m_axis_tlast (m_axis_data_tlast), + .m_axis_tvalid (m_axis_data_tvalid), + .m_axis_tready (m_axis_data_tready), + .m_axis_ttimestamp (m_axis_data_ttimestamp), + .m_axis_thas_time (m_axis_data_thas_time), + .m_axis_tlength (m_axis_data_tlength), + .m_axis_teov (), + .m_axis_teob (m_axis_data_teob), + .s_axis_tdata (s_axis_data_tdata), + .s_axis_tkeep ({NUM_PORTS*NIPC{1'b1}}), + .s_axis_tlast (s_axis_data_tlast), + .s_axis_tvalid (s_axis_data_tvalid), + .s_axis_tready (s_axis_data_tready), + .s_axis_ttimestamp (s_axis_data_ttimestamp), + .s_axis_thas_time (s_axis_data_thas_time), + .s_axis_teov ({NUM_PORTS{1'b0}}), + .s_axis_teob (s_axis_data_teob) + ); + + + //--------------------------------------------------------------------------- + // Register Translation + //--------------------------------------------------------------------------- + // + // Each DUC block is allocated an address spaces. This block translates CTRL + // port transactions in that space to settings bus. + // + //--------------------------------------------------------------------------- + + wire [ 8*NUM_PORTS-1:0] set_addr; + wire [32*NUM_PORTS-1:0] set_data; + wire [ NUM_PORTS-1:0] set_has_time; + wire [ NUM_PORTS-1:0] set_stb; + wire [64*NUM_PORTS-1:0] set_time; + wire [ 8*NUM_PORTS-1:0] rb_addr; + reg [64*NUM_PORTS-1:0] rb_data; + + ctrlport_to_settings_bus # ( + .NUM_PORTS (NUM_PORTS) + ) ctrlport_to_settings_bus_i ( + .ctrlport_clk (ce_clk), + .ctrlport_rst (duc_rst), + .s_ctrlport_req_wr (ctrlport_req_wr), + .s_ctrlport_req_rd (ctrlport_req_rd), + .s_ctrlport_req_addr (ctrlport_req_addr), + .s_ctrlport_req_data (ctrlport_req_data), + .s_ctrlport_req_has_time (ctrlport_req_has_time), + .s_ctrlport_req_time (ctrlport_req_time), + .s_ctrlport_resp_ack (ctrlport_resp_ack), + .s_ctrlport_resp_data (ctrlport_resp_data), + .set_data (set_data), + .set_addr (set_addr), + .set_stb (set_stb), + .set_time (set_time), + .set_has_time (set_has_time), + .rb_stb ({NUM_PORTS{1'b1}}), + .rb_addr (rb_addr), + .rb_data (rb_data)); + + + //--------------------------------------------------------------------------- + // DUC Implementation + //--------------------------------------------------------------------------- + + // Unused signals + wire [ NUM_PORTS-1:0] clear_tx_seqnum = 0; + wire [16*NUM_PORTS-1:0] src_sid = 0; + wire [16*NUM_PORTS-1:0] next_dst_sid = 0; + + localparam MAX_M = CIC_MAX_INTERP * 2<<(NUM_HB-1); + + genvar i; + generate + for (i = 0; i < NUM_PORTS; i = i + 1) begin : gen_duc_chains + wire clear_user; + wire clear_duc = clear_tx_seqnum[i] | clear_user; + + wire set_stb_int = set_stb[i]; + wire [7:0] set_addr_int = set_addr[8*i+7:8*i]; + wire [31:0] set_data_int = set_data[32*i+31:32*i]; + wire [63:0] set_time_int = set_time[64*i+63:64*i]; + wire set_has_time_int = set_has_time[i]; + + // Build the expected tuser CHDR header + cvita_hdr_encoder cvita_hdr_encoder_i ( + .pkt_type (2'b0), + .eob (m_axis_data_teob[i]), + .has_time (m_axis_data_thas_time[i]), + .seqnum (12'b0), + .payload_length (m_axis_data_tlength[16*i +: 16]), + .src_sid (16'b0), + .dst_sid (16'b0), + .vita_time (m_axis_data_ttimestamp[64*i +: 64]), + .header (m_axis_data_tuser[128*i+:128]) + ); + + // Extract bit fields from outgoing tuser CHDR header + assign s_axis_data_teob[i] = s_axis_data_tuser[128*i+124 +: 1]; + assign s_axis_data_thas_time[i] = s_axis_data_tuser[128*i+125 +: 1]; + assign s_axis_data_ttimestamp[64*i+:64] = s_axis_data_tuser[128*i+ 0 +: 64]; + + // TODO Readback register for number of FIR filter taps + always @(*) begin + case(rb_addr[i*8+7:i*8]) + RB_COMPAT_NUM : rb_data[i*64+63:i*64] <= {COMPAT_MAJOR, COMPAT_MINOR}; + RB_NUM_HB : rb_data[i*64+63:i*64] <= NUM_HB; + RB_CIC_MAX_INTERP : rb_data[i*64+63:i*64] <= CIC_MAX_INTERP; + default : rb_data[i*64+63:i*64] <= 64'h0BADC0DE0BADC0DE; + endcase + end + + //////////////////////////////////////////////////////////// + // + // Timed CORDIC + // - Implements timed cordic tunes. Placed between AXI Wrapper + // and AXI Rate Change due to it needing access to the + // vita time of the samples. + // + //////////////////////////////////////////////////////////// + wire [31:0] m_axis_rc_tdata; + wire m_axis_rc_tlast; + wire m_axis_rc_tvalid; + wire m_axis_rc_tready; + wire [127:0] m_axis_rc_tuser; + + dds_timed #( + .SR_FREQ_ADDR(SR_FREQ_ADDR), + .SR_SCALE_IQ_ADDR(SR_SCALE_IQ_ADDR)) + dds_timed ( + .clk(ce_clk), .reset(duc_rst), .clear(clear_tx_seqnum[i]), + .timed_cmd_fifo_full(), + .set_stb(set_stb_int), .set_addr(set_addr_int), .set_data(set_data_int), + .set_time(set_time_int), .set_has_time(set_has_time_int), + .i_tdata(m_axis_rc_tdata), .i_tlast(m_axis_rc_tlast), .i_tvalid(m_axis_rc_tvalid), + .i_tready(m_axis_rc_tready), .i_tuser(m_axis_rc_tuser), + .o_tdata(s_axis_data_tdata[ITEM_W*i+:ITEM_W]), .o_tlast(s_axis_data_tlast[i]), .o_tvalid(s_axis_data_tvalid[i]), + .o_tready(s_axis_data_tready[i]), .o_tuser(s_axis_data_tuser[128*i+:128])); + + //////////////////////////////////////////////////////////// + // + // Increase Rate + // + //////////////////////////////////////////////////////////// + wire [31:0] sample_tdata, sample_duc_tdata; + wire sample_tvalid, sample_tready; + wire sample_duc_tvalid, sample_duc_tready; + axi_rate_change #( + .WIDTH(32), + .MAX_N(1), + .MAX_M(MAX_M), + .SR_N_ADDR(SR_N_ADDR), + .SR_M_ADDR(SR_M_ADDR), + .SR_CONFIG_ADDR(SR_CONFIG_ADDR)) + axi_rate_change ( + .clk(ce_clk), .reset(duc_rst), .clear(clear_tx_seqnum[i]), .clear_user(clear_user), + .src_sid(src_sid[16*i+15:16*i]), .dst_sid(next_dst_sid[16*i+15:16*i]), + .set_stb(set_stb_int), .set_addr(set_addr_int), .set_data(set_data_int), + .i_tdata(m_axis_data_tdata[ITEM_W*i+:ITEM_W]), .i_tlast(m_axis_data_tlast[i]), .i_tvalid(m_axis_data_tvalid[i]), + .i_tready(m_axis_data_tready[i]), .i_tuser(m_axis_data_tuser[128*i+:128]), + .o_tdata(m_axis_rc_tdata), .o_tlast(m_axis_rc_tlast), .o_tvalid(m_axis_rc_tvalid), + .o_tready(m_axis_rc_tready), .o_tuser(m_axis_rc_tuser), + .m_axis_data_tdata({sample_tdata}), .m_axis_data_tlast(), .m_axis_data_tvalid(sample_tvalid), + .m_axis_data_tready(sample_tready), + .s_axis_data_tdata(sample_duc_tdata), .s_axis_data_tlast(1'b0), .s_axis_data_tvalid(sample_duc_tvalid), + .s_axis_data_tready(sample_duc_tready), + .warning_long_throttle(), .error_extra_outputs(), .error_drop_pkt_lockup()); + + //////////////////////////////////////////////////////////// + // + // Digital Up Converter + // + //////////////////////////////////////////////////////////// + duc #( + .SR_INTERP_ADDR(SR_INTERP_ADDR), + .NUM_HB(NUM_HB), + .CIC_MAX_INTERP(CIC_MAX_INTERP)) + duc ( + .clk(ce_clk), .reset(duc_rst), .clear(clear_duc), + .set_stb(set_stb_int), .set_addr(set_addr_int), .set_data(set_data_int), + .i_tdata(sample_tdata), .i_tuser(128'b0), .i_tvalid(sample_tvalid), .i_tready(sample_tready), + .o_tdata(sample_duc_tdata), .o_tuser(), .o_tvalid(sample_duc_tvalid), .o_tready(sample_duc_tready)); + + end + endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/rfnoc_block_duc_regs.vh b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/rfnoc_block_duc_regs.vh new file mode 100644 index 000000000..fa239857e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/rfnoc_block_duc_regs.vh @@ -0,0 +1,25 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_duc_regs (Header) +// +// Description: Header file for RFNoC DUC functionality. This includes +// register offsets, bitfields and constants for the radio components. +// + +// For now, these offsets match the original DUC +localparam DUC_BASE_ADDR = 'h00; +localparam DUC_ADDR_W = 8; + +localparam RB_COMPAT_NUM = 0; +localparam RB_NUM_HB = 1; +localparam RB_CIC_MAX_INTERP = 2; +localparam SR_N_ADDR = 128; +localparam SR_M_ADDR = 129; +localparam SR_CONFIG_ADDR = 130; +localparam SR_INTERP_ADDR = 131; +localparam SR_FREQ_ADDR = 132; +localparam SR_SCALE_IQ_ADDR = 133; + diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/rfnoc_block_duc_tb.sv b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/rfnoc_block_duc_tb.sv new file mode 100644 index 000000000..5bca3f03b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_duc/rfnoc_block_duc_tb.sv @@ -0,0 +1,387 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_duc_tb +// +// Description: Testbench for rfnoc_block_duc +// + + +module rfnoc_block_duc_tb(); + + // Include macros and time declarations for use with PkgTestExec + `include "test_exec.svh" + + import PkgTestExec::*; + import PkgChdrUtils::*; + import PkgRfnocBlockCtrlBfm::*; + + `include "rfnoc_block_duc_regs.vh" + + + //--------------------------------------------------------------------------- + // Local Parameters + //--------------------------------------------------------------------------- + + // Simulation parameters + localparam real CHDR_CLK_PER = 5.0; // CHDR clock rate + localparam real DUC_CLK_PER = 4.0; // DUC IP clock rate + localparam int EXTENDED_TEST = 0; // Perform a longer test + localparam int SPP = 128; // Samples per packet + localparam int PKT_SIZE_BYTES = SPP*4; // Bytes per packet + localparam int STALL_PROB = 25; // BFM stall probability + + // Block configuration + localparam int CHDR_W = 64; + localparam int THIS_PORTID = 'h123; + localparam int MTU = 8; + localparam int NUM_PORTS = 1; + localparam int NUM_HB = 3; + localparam int CIC_MAX_INTERP = 128; + + + //--------------------------------------------------------------------------- + // Clocks + //--------------------------------------------------------------------------- + + bit rfnoc_chdr_clk; + bit rfnoc_ctrl_clk; + + sim_clock_gen #(CHDR_CLK_PER) rfnoc_chdr_clk_gen (.clk(rfnoc_chdr_clk), .rst()); + sim_clock_gen #(CHDR_CLK_PER) rfnoc_ctrl_clk_gen (.clk(rfnoc_ctrl_clk), .rst()); + sim_clock_gen #(DUC_CLK_PER) duc_clk_gen (.clk(ce_clk), .rst()); + + + //--------------------------------------------------------------------------- + // Bus Functional Models + //--------------------------------------------------------------------------- + + RfnocBackendIf backend (rfnoc_chdr_clk, rfnoc_ctrl_clk); + AxiStreamIf #(32) m_ctrl (rfnoc_ctrl_clk, 1'b0); + AxiStreamIf #(32) s_ctrl (rfnoc_ctrl_clk, 1'b0); + AxiStreamIf #(CHDR_W) m_chdr [NUM_PORTS] (rfnoc_chdr_clk, 1'b0); + AxiStreamIf #(CHDR_W) s_chdr [NUM_PORTS] (rfnoc_chdr_clk, 1'b0); + + // Bus functional model for a software block controller + RfnocBlockCtrlBfm #(.CHDR_W(CHDR_W)) blk_ctrl = + new(backend, m_ctrl, s_ctrl); + + // Connect block controller to BFMs + for (genvar i = 0; i < NUM_PORTS; i++) begin : gen_bfm_connections + initial begin + blk_ctrl.connect_master_data_port(i, m_chdr[i], PKT_SIZE_BYTES); + blk_ctrl.connect_slave_data_port(i, s_chdr[i]); + blk_ctrl.set_master_stall_prob(i, STALL_PROB); + blk_ctrl.set_slave_stall_prob(i, STALL_PROB); + end + end + + + //--------------------------------------------------------------------------- + // DUT + //--------------------------------------------------------------------------- + + logic [NUM_PORTS*CHDR_W-1:0] s_rfnoc_chdr_tdata; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tlast; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tvalid; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tready; + + logic [NUM_PORTS*CHDR_W-1:0] m_rfnoc_chdr_tdata; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tlast; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tvalid; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tready; + + // Map the array of BFMs to a flat vector for the DUT + genvar i; + for (i = 0; i < NUM_PORTS; i++) begin : gen_dut_connections + // Connect BFM master to DUT slave port + assign s_rfnoc_chdr_tdata[CHDR_W*i+:CHDR_W] = m_chdr[i].tdata; + assign s_rfnoc_chdr_tlast[i] = m_chdr[i].tlast; + assign s_rfnoc_chdr_tvalid[i] = m_chdr[i].tvalid; + assign m_chdr[i].tready = s_rfnoc_chdr_tready[i]; + + // Connect BFM slave to DUT master port + assign s_chdr[i].tdata = m_rfnoc_chdr_tdata[CHDR_W*i+:CHDR_W]; + assign s_chdr[i].tlast = m_rfnoc_chdr_tlast[i]; + assign s_chdr[i].tvalid = m_rfnoc_chdr_tvalid[i]; + assign m_rfnoc_chdr_tready[i] = s_chdr[i].tready; + end + + rfnoc_block_duc #( + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W), + .NUM_PORTS (NUM_PORTS), + .MTU (MTU), + .NUM_HB (NUM_HB), + .CIC_MAX_INTERP (CIC_MAX_INTERP) + ) rfnoc_block_duc_i ( + .rfnoc_chdr_clk (backend.chdr_clk), + .ce_clk (ce_clk), + .s_rfnoc_chdr_tdata (s_rfnoc_chdr_tdata), + .s_rfnoc_chdr_tlast (s_rfnoc_chdr_tlast), + .s_rfnoc_chdr_tvalid (s_rfnoc_chdr_tvalid), + .s_rfnoc_chdr_tready (s_rfnoc_chdr_tready), + .m_rfnoc_chdr_tdata (m_rfnoc_chdr_tdata), + .m_rfnoc_chdr_tlast (m_rfnoc_chdr_tlast), + .m_rfnoc_chdr_tvalid (m_rfnoc_chdr_tvalid), + .m_rfnoc_chdr_tready (m_rfnoc_chdr_tready), + .rfnoc_core_config (backend.cfg), + .rfnoc_core_status (backend.sts), + .rfnoc_ctrl_clk (backend.ctrl_clk), + .s_rfnoc_ctrl_tdata (m_ctrl.tdata), + .s_rfnoc_ctrl_tlast (m_ctrl.tlast), + .s_rfnoc_ctrl_tvalid (m_ctrl.tvalid), + .s_rfnoc_ctrl_tready (m_ctrl.tready), + .m_rfnoc_ctrl_tdata (s_ctrl.tdata), + .m_rfnoc_ctrl_tlast (s_ctrl.tlast), + .m_rfnoc_ctrl_tvalid (s_ctrl.tvalid), + .m_rfnoc_ctrl_tready (s_ctrl.tready) + ); + + + //--------------------------------------------------------------------------- + // Helper Tasks + //--------------------------------------------------------------------------- + + // Translate the desired register access to a ctrlport write request. + task automatic write_reg(int port, byte unsigned addr, bit [31:0] value); + blk_ctrl.reg_write(256*8*port + addr*8, value); + endtask : write_reg + + + // Translate the desired register access to a ctrlport read request. + task automatic read_user_reg(int port, byte unsigned addr, output logic [63:0] value); + blk_ctrl.reg_read(256*8*port + addr*8 + 0, value[31: 0]); + blk_ctrl.reg_read(256*8*port + addr*8 + 4, value[63:32]); + endtask : read_user_reg + + + // Set the interpolation rate + task automatic set_interp_rate(int port, int interp_rate); + begin + logic [7:0] cic_rate = 8'd0; + logic [7:0] hb_enables = 2'b0; + + int _interp_rate = interp_rate; + + // Calculate which half bands to enable and whatever is left over set the CIC + while ((_interp_rate[0] == 0) && (hb_enables < NUM_HB)) begin + hb_enables += 1'b1; + _interp_rate = _interp_rate >> 1; + end + + // CIC rate cannot be set to 0 + cic_rate = (_interp_rate[7:0] == 8'd0) ? 8'd1 : _interp_rate[7:0]; + `ASSERT_ERROR(hb_enables <= NUM_HB, "Enabled halfbands may not exceed total number of half bands."); + `ASSERT_ERROR(cic_rate > 0 && cic_rate <= CIC_MAX_INTERP, + "CIC Interpolation rate must be positive, not exceed the max cic interpolation rate, and cannot equal 0!"); + + // Setup DUC + $display("Set interpolation to %0d", interp_rate); + $display("- Number of enabled HBs: %0d", hb_enables); + $display("- CIC Rate: %0d", cic_rate); + write_reg(port, SR_M_ADDR, interp_rate); // Set interpolation rate in AXI rate change + write_reg(port, SR_INTERP_ADDR, {hb_enables, cic_rate}); // Enable HBs, set CIC rate + end + endtask + + + // Test sending packets of ones + task automatic send_ones(int port, int interp_rate, bit has_time); + begin + const bit [63:0] start_time = 64'h0123456789ABCDEF; + + set_interp_rate(port, interp_rate); + + // Setup DUC + write_reg(port, SR_CONFIG_ADDR, 32'd1); // Enable clear EOB + write_reg(port, SR_FREQ_ADDR, 32'd0); // CORDIC phase increment + write_reg(port, SR_SCALE_IQ_ADDR, (1 << 14)); // Scaling, set to 1 + + fork + begin + chdr_word_t send_payload[$]; + packet_info_t pkt_info; + + $display("Send ones"); + + // Generate a payload of all ones + send_payload = {}; + for (int i = 0; i < PKT_SIZE_BYTES/8; i++) begin + send_payload.push_back({16'hffff, 16'hffff, 16'hffff, 16'hffff}); + end + + // Send two packets with EOB on the second packet + pkt_info = 0; + pkt_info.has_time = has_time; + pkt_info.timestamp = start_time; + blk_ctrl.send_packets(port, send_payload, /*data_bytes*/, /*metadata*/, pkt_info); + pkt_info.timestamp = start_time + SPP; + pkt_info.eob = 1; + blk_ctrl.send_packets(port, send_payload, /*data_bytes*/, /*metadata*/, pkt_info); + + $display("Send ones complete"); + end + begin + string s; + chdr_word_t samples; + int data_bytes; + chdr_word_t recv_payload[$]; + chdr_word_t metadata[$]; + packet_info_t pkt_info; + + $display("Check incoming samples"); + for (int i = 0; i < 2*interp_rate; i++) begin + blk_ctrl.recv_adv(port, recv_payload, data_bytes, metadata, pkt_info); + + // Check the packet size + $sformat(s, "incorrect (drop) packet size! expected: %0d, actual: %0d", PKT_SIZE_BYTES/8, recv_payload.size()); + `ASSERT_ERROR(recv_payload.size() == PKT_SIZE_BYTES/8, s); + + // Check the timestamp + if (has_time) begin + bit [63:0] expected_time; + // Calculate what the timestamp should be + expected_time = start_time + i * SPP; + $sformat(s, "Incorrect timestamp: has_time = %0d, timestamp = 0x%0X, expected 0x%0X", + pkt_info.has_time, pkt_info.timestamp, expected_time); + `ASSERT_ERROR(pkt_info.has_time == 1 && pkt_info.timestamp == expected_time, s); + end else begin + `ASSERT_ERROR(pkt_info.has_time == 0, "Packet has timestamp when it shouldn't"); + end + + // Check EOB + if (i == 2*interp_rate-1) begin + `ASSERT_ERROR(pkt_info.eob == 1, "EOB not set on last packet"); + end else begin + `ASSERT_ERROR(pkt_info.eob == 0, + $sformatf("EOB unexpectedly set on packet %0d", i)); + end + + // Check the sample values + samples = 64'd0; + for (int j = 0; j < PKT_SIZE_BYTES/8; j++) begin + samples = recv_payload[j]; + $sformat(s, "Ramp word %0d invalid! Expected a real value, Received: %0d", 2*j, samples); + `ASSERT_ERROR(samples >= 0, s); + end + end + $display("Check complete"); + end + join + end + endtask + + + //--------------------------------------------------------------------------- + // Test Process + //--------------------------------------------------------------------------- + + initial begin : tb_main + const int port = 0; + test.start_tb("rfnoc_block_duc_tb"); + + // Start the BFMs running + blk_ctrl.run(); + + + //------------------------------------------------------------------------- + // Reset + //------------------------------------------------------------------------- + + test.start_test("Wait for Reset", 10us); + fork + blk_ctrl.reset_chdr(); + blk_ctrl.reset_ctrl(); + join; + test.end_test(); + + + //------------------------------------------------------------------------- + // Check NoC ID and Block Info + //------------------------------------------------------------------------- + + test.start_test("Verify Block Info", 2us); + `ASSERT_ERROR(blk_ctrl.get_noc_id() == rfnoc_block_duc_i.NOC_ID, "Incorrect NOC_ID value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_i() == NUM_PORTS, "Incorrect NUM_DATA_I value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_o() == NUM_PORTS, "Incorrect NUM_DATA_O value"); + `ASSERT_ERROR(blk_ctrl.get_mtu() == MTU, "Incorrect MTU value"); + test.end_test(); + + + //------------------------------------------------------------------------- + // Test read-back regs + //------------------------------------------------------------------------- + + begin + logic [63:0] val64; + test.start_test("Test registers", 10us); + read_user_reg(port, RB_NUM_HB, val64); + `ASSERT_ERROR(val64 == NUM_HB, "Register NUM_HB didn't read back expected value"); + read_user_reg(port, RB_CIC_MAX_INTERP, val64); + `ASSERT_ERROR(val64 ==CIC_MAX_INTERP, "Register RB_CIC_MAX_INTERP didn't read back expected value"); + test.end_test(); + end + + + //------------------------------------------------------------------------- + // Test various interpolation rates (no timestamp) + //------------------------------------------------------------------------- + + begin + test.start_test("Test interpolation rates (with timestamp)", 0.5ms); + + $display("Note: This test will take a long time!"); + send_ones(port, 1, 1); // HBs enabled: 0, CIC rate: 1 + send_ones(port, 2, 1); // HBs enabled: 1, CIC rate: 1 + send_ones(port, 3, 1); // HBs enabled: 0, CIC rate: 3 + send_ones(port, 4, 1); // HBs enabled: 2, CIC rate: 1 + send_ones(port, 6, 1); // HBs enabled: 1, CIC rate: 3 + send_ones(port, 8, 1); // HBs enabled: 2, CIC rate: 2 + send_ones(port, 12, 1); // HBs enabled: 2, CIC rate: 3 + send_ones(port, 13, 1); // HBs enabled: 0, CIC rate: 13 + send_ones(port, 16, 1); // HBs enabled: 2, CIC rate: 3 + send_ones(port, 40, 1); // HBs enabled: 2, CIC rate: 20 + + test.end_test(); + end + + + //------------------------------------------------------------------------- + // Test various interpolation rates (without timestamp) + //------------------------------------------------------------------------- + + begin + test.start_test("Test interpolation rates (no timestamp)", 0.5ms); + + send_ones(port, 1, 0); // HBs enabled: 0, CIC rate: 1 + send_ones(port, 3, 0); // HBs enabled: 0, CIC rate: 3 + + test.end_test(); + end + + + //------------------------------------------------------------------------- + // Test timed tune + //------------------------------------------------------------------------- + + // This test has not been implemented because the RFNoC FFT has not been + // ported yet. + + + //------------------------------------------------------------------------- + // Finish + //------------------------------------------------------------------------- + + // End the TB, but don't $finish, since we don't want to kill other + // instances of this testbench that may be running. + test.end_tb(0); + + // Kill the clocks to end this instance of the testbench + rfnoc_chdr_clk_gen.kill(); + rfnoc_ctrl_clk_gen.kill(); + duc_clk_gen.kill(); + end +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/Makefile b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/Makefile new file mode 100644 index 000000000..868246fbd --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/Makefile @@ -0,0 +1,62 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# IP Specific +#------------------------------------------------- +# If simulation contains IP, define the IP_DIR and point +# it to the base level IP directory +LIB_IP_DIR = $(BASE_DIR)/../lib/ip + +# Include makefiles and sources for all IP components +# *after* defining the LIB_IP_DIR +include $(LIB_IP_DIR)/axi_fft/Makefile.inc +include $(LIB_IP_DIR)/complex_to_magphase/Makefile.inc + +DESIGN_SRCS += $(abspath \ +$(LIB_IP_AXI_FFT_OUTS) \ +) + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/utils/Makefile.srcs +include Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(RFNOC_CORE_SRCS) \ +$(RFNOC_UTIL_SRCS) \ +$(RFNOC_OOT_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +SIM_TOP = rfnoc_block_fft_tb + +# Add test bench, user design under test, and +# additional user created files +SIM_SRCS = \ +$(abspath rfnoc_block_fft_tb.sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/Makefile.srcs b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/Makefile.srcs new file mode 100644 index 000000000..21ba967f2 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/Makefile.srcs @@ -0,0 +1,10 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +RFNOC_OOT_SRCS += $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/blocks/rfnoc_block_fft/, \ +noc_shell_fft.v \ +rfnoc_block_fft.v \ +)) diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/noc_shell_fft.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/noc_shell_fft.v new file mode 100644 index 000000000..37a60ef31 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/noc_shell_fft.v @@ -0,0 +1,294 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: noc_shell_fft +// + +module noc_shell_fft #( + parameter [31:0] NOC_ID = 32 'h0, + parameter [ 9:0] THIS_PORTID = 10 'd0, + parameter CHDR_W = 64, + parameter [ 0:0] CTRLPORT_SLV_EN = 1, + parameter [ 0:0] CTRLPORT_MST_EN = 1, + parameter SYNC_CLKS = 0, + parameter [ 5:0] NUM_DATA_I = 1, + parameter [ 5:0] NUM_DATA_O = 1, + parameter ITEM_W = 32, + parameter NIPC = 2, + parameter PYLD_FIFO_SIZE = 5, + parameter CTXT_FIFO_SIZE = 5, + parameter MTU = 10 +) ( + //--------------------------------------------------------------------------- + // Framework Interface + //--------------------------------------------------------------------------- + + // RFNoC Framework Clocks and Resets + input wire rfnoc_chdr_clk, + output wire rfnoc_chdr_rst, + input wire rfnoc_ctrl_clk, + output wire rfnoc_ctrl_rst, + // RFNoC Backend Interface + input wire [ 511:0] rfnoc_core_config, + output wire [ 511:0] rfnoc_core_status, + // CHDR Input Ports (from framework) + input wire [(CHDR_W*NUM_DATA_I)-1:0] s_rfnoc_chdr_tdata, + input wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tlast, + input wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tvalid, + output wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tready, + // CHDR Output Ports (to framework) + output wire [(CHDR_W*NUM_DATA_O)-1:0] m_rfnoc_chdr_tdata, + output wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tlast, + output wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tvalid, + input wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tready, + // AXIS-Ctrl Input Port (from framework) + input wire [ 31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + // AXIS-Ctrl Output Port (to framework) + output wire [ 31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready, + + //--------------------------------------------------------------------------- + // Client Control Port Interface + //--------------------------------------------------------------------------- + + // Clock + input wire ctrlport_clk, + input wire ctrlport_rst, + // Master + output wire m_ctrlport_req_wr, + output wire m_ctrlport_req_rd, + output wire [19:0] m_ctrlport_req_addr, + output wire [31:0] m_ctrlport_req_data, + output wire [ 3:0] m_ctrlport_req_byte_en, + output wire m_ctrlport_req_has_time, + output wire [63:0] m_ctrlport_req_time, + input wire m_ctrlport_resp_ack, + input wire [ 1:0] m_ctrlport_resp_status, + input wire [31:0] m_ctrlport_resp_data, + // Slave + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [ 9:0] s_ctrlport_req_portid, + input wire [15:0] s_ctrlport_req_rem_epid, + input wire [ 9:0] s_ctrlport_req_rem_portid, + input wire [31:0] s_ctrlport_req_data, + input wire [ 3:0] s_ctrlport_req_byte_en, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + output wire s_ctrlport_resp_ack, + output wire [ 1:0] s_ctrlport_resp_status, + output wire [31:0] s_ctrlport_resp_data, + + //--------------------------------------------------------------------------- + // Client Data Interface + //--------------------------------------------------------------------------- + + // Clock + input wire axis_data_clk, + input wire axis_data_rst, + + // Output data stream (to user logic) + output wire [(NUM_DATA_I*ITEM_W*NIPC)-1:0] m_axis_payload_tdata, + output wire [ (NUM_DATA_I*NIPC)-1:0] m_axis_payload_tkeep, + output wire [ NUM_DATA_I-1:0] m_axis_payload_tlast, + output wire [ NUM_DATA_I-1:0] m_axis_payload_tvalid, + input wire [ NUM_DATA_I-1:0] m_axis_payload_tready, + + // Input data stream (from user logic) + input wire [(NUM_DATA_O*ITEM_W*NIPC)-1:0] s_axis_payload_tdata, + input wire [ (NUM_DATA_O*NIPC)-1:0] s_axis_payload_tkeep, + input wire [ NUM_DATA_O-1:0] s_axis_payload_tlast, + input wire [ NUM_DATA_O-1:0] s_axis_payload_tvalid, + output wire [ NUM_DATA_O-1:0] s_axis_payload_tready, + + // Output context stream (to user logic) + output wire [(NUM_DATA_I*CHDR_W)-1:0] m_axis_context_tdata, + output wire [ (4*NUM_DATA_I)-1:0] m_axis_context_tuser, + output wire [ NUM_DATA_I-1:0] m_axis_context_tlast, + output wire [ NUM_DATA_I-1:0] m_axis_context_tvalid, + input wire [ NUM_DATA_I-1:0] m_axis_context_tready, + + // Input context stream (from user logic) + input wire [(NUM_DATA_O*CHDR_W)-1:0] s_axis_context_tdata, + input wire [ (4*NUM_DATA_O)-1:0] s_axis_context_tuser, + input wire [ NUM_DATA_O-1:0] s_axis_context_tlast, + input wire [ NUM_DATA_O-1:0] s_axis_context_tvalid, + output wire [ NUM_DATA_O-1:0] s_axis_context_tready +); + + localparam CTRL_FIFO_SIZE = 5; + + + //--------------------------------------------------------------------------- + // Backend Interface + //--------------------------------------------------------------------------- + + wire data_i_flush_en; + wire [31:0] data_i_flush_timeout; + wire [63:0] data_i_flush_active; + wire [63:0] data_i_flush_done; + wire data_o_flush_en; + wire [31:0] data_o_flush_timeout; + wire [63:0] data_o_flush_active; + wire [63:0] data_o_flush_done; + + backend_iface #( + .NOC_ID (NOC_ID), + .NUM_DATA_I (NUM_DATA_I), + .NUM_DATA_O (NUM_DATA_O), + .CTRL_FIFOSIZE (CTRL_FIFO_SIZE), + .MTU (MTU) + ) backend_iface_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk), + .rfnoc_core_config (rfnoc_core_config), + .rfnoc_core_status (rfnoc_core_status), + .rfnoc_chdr_rst (rfnoc_chdr_rst), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst), + .data_i_flush_en (data_i_flush_en), + .data_i_flush_timeout (data_i_flush_timeout), + .data_i_flush_active (data_i_flush_active), + .data_i_flush_done (data_i_flush_done), + .data_o_flush_en (data_o_flush_en), + .data_o_flush_timeout (data_o_flush_timeout), + .data_o_flush_active (data_o_flush_active), + .data_o_flush_done (data_o_flush_done) + ); + + //--------------------------------------------------------------------------- + // Control Path + //--------------------------------------------------------------------------- + + ctrlport_endpoint #( + .THIS_PORTID (THIS_PORTID ), + .SYNC_CLKS (0 ), + .AXIS_CTRL_MST_EN (CTRLPORT_SLV_EN), + .AXIS_CTRL_SLV_EN (CTRLPORT_MST_EN), + .SLAVE_FIFO_SIZE (CTRL_FIFO_SIZE ) + ) ctrlport_ep_i ( + .rfnoc_ctrl_clk (rfnoc_ctrl_clk ), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst ), + .ctrlport_clk (ctrlport_clk ), + .ctrlport_rst (ctrlport_rst ), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata ), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast ), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid ), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready ), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata ), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast ), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid ), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready ), + .m_ctrlport_req_wr (m_ctrlport_req_wr ), + .m_ctrlport_req_rd (m_ctrlport_req_rd ), + .m_ctrlport_req_addr (m_ctrlport_req_addr ), + .m_ctrlport_req_data (m_ctrlport_req_data ), + .m_ctrlport_req_byte_en (m_ctrlport_req_byte_en ), + .m_ctrlport_req_has_time (m_ctrlport_req_has_time ), + .m_ctrlport_req_time (m_ctrlport_req_time ), + .m_ctrlport_resp_ack (m_ctrlport_resp_ack ), + .m_ctrlport_resp_status (m_ctrlport_resp_status ), + .m_ctrlport_resp_data (m_ctrlport_resp_data ), + .s_ctrlport_req_wr (s_ctrlport_req_wr ), + .s_ctrlport_req_rd (s_ctrlport_req_rd ), + .s_ctrlport_req_addr (s_ctrlport_req_addr ), + .s_ctrlport_req_portid (s_ctrlport_req_portid ), + .s_ctrlport_req_rem_epid (s_ctrlport_req_rem_epid ), + .s_ctrlport_req_rem_portid(s_ctrlport_req_rem_portid), + .s_ctrlport_req_data (s_ctrlport_req_data ), + .s_ctrlport_req_byte_en (s_ctrlport_req_byte_en ), + .s_ctrlport_req_has_time (s_ctrlport_req_has_time ), + .s_ctrlport_req_time (s_ctrlport_req_time ), + .s_ctrlport_resp_ack (s_ctrlport_resp_ack ), + .s_ctrlport_resp_status (s_ctrlport_resp_status ), + .s_ctrlport_resp_data (s_ctrlport_resp_data ) + ); + + //--------------------------------------------------------------------------- + // Data Path + //--------------------------------------------------------------------------- + + genvar i; + generate + + for (i = 0; i < NUM_DATA_I; i = i + 1) begin: chdr_to_data + chdr_to_axis_pyld_ctxt #( + .CHDR_W (CHDR_W ), + .ITEM_W (ITEM_W ), + .NIPC (NIPC ), + .SYNC_CLKS (SYNC_CLKS ), + .CONTEXT_FIFO_SIZE (CTXT_FIFO_SIZE), + .PAYLOAD_FIFO_SIZE (PYLD_FIFO_SIZE), + .CONTEXT_PREFETCH_EN (1 ) + ) chdr_to_axis_pyld_ctxt_i ( + .axis_chdr_clk (rfnoc_chdr_clk ), + .axis_chdr_rst (rfnoc_chdr_rst ), + .axis_data_clk (axis_data_clk ), + .axis_data_rst (axis_data_rst ), + .s_axis_chdr_tdata (s_rfnoc_chdr_tdata [(i*CHDR_W)+:CHDR_W] ), + .s_axis_chdr_tlast (s_rfnoc_chdr_tlast [i] ), + .s_axis_chdr_tvalid (s_rfnoc_chdr_tvalid [i] ), + .s_axis_chdr_tready (s_rfnoc_chdr_tready [i] ), + .m_axis_payload_tdata (m_axis_payload_tdata [(i*ITEM_W*NIPC)+:(ITEM_W*NIPC)]), + .m_axis_payload_tkeep (m_axis_payload_tkeep [(i*NIPC)+:NIPC] ), + .m_axis_payload_tlast (m_axis_payload_tlast [i] ), + .m_axis_payload_tvalid(m_axis_payload_tvalid[i] ), + .m_axis_payload_tready(m_axis_payload_tready[i] ), + .m_axis_context_tdata (m_axis_context_tdata [(i*CHDR_W)+:(CHDR_W)] ), + .m_axis_context_tuser (m_axis_context_tuser [(i*4)+:4] ), + .m_axis_context_tlast (m_axis_context_tlast [i] ), + .m_axis_context_tvalid(m_axis_context_tvalid[i] ), + .m_axis_context_tready(m_axis_context_tready[i] ), + .flush_en (data_i_flush_en ), + .flush_timeout (data_i_flush_timeout ), + .flush_active (data_i_flush_active [i] ), + .flush_done (data_i_flush_done [i] ) + ); + end + + for (i = 0; i < NUM_DATA_O; i = i + 1) begin: data_to_chdr + axis_pyld_ctxt_to_chdr #( + .CHDR_W (CHDR_W ), + .ITEM_W (ITEM_W ), + .NIPC (NIPC ), + .SYNC_CLKS (SYNC_CLKS ), + .CONTEXT_FIFO_SIZE (CTXT_FIFO_SIZE), + .PAYLOAD_FIFO_SIZE (PYLD_FIFO_SIZE), + .CONTEXT_PREFETCH_EN (1 ), + .MTU (MTU ) + ) axis_pyld_ctxt_to_chdr_i ( + .axis_chdr_clk (rfnoc_chdr_clk ), + .axis_chdr_rst (rfnoc_chdr_rst ), + .axis_data_clk (axis_data_clk ), + .axis_data_rst (axis_data_rst ), + .m_axis_chdr_tdata (m_rfnoc_chdr_tdata [(i*CHDR_W)+:CHDR_W] ), + .m_axis_chdr_tlast (m_rfnoc_chdr_tlast [i] ), + .m_axis_chdr_tvalid (m_rfnoc_chdr_tvalid [i] ), + .m_axis_chdr_tready (m_rfnoc_chdr_tready [i] ), + .s_axis_payload_tdata (s_axis_payload_tdata [(i*ITEM_W*NIPC)+:(ITEM_W*NIPC)]), + .s_axis_payload_tkeep (s_axis_payload_tkeep [(i*NIPC)+:NIPC] ), + .s_axis_payload_tlast (s_axis_payload_tlast [i] ), + .s_axis_payload_tvalid(s_axis_payload_tvalid[i] ), + .s_axis_payload_tready(s_axis_payload_tready[i] ), + .s_axis_context_tdata (s_axis_context_tdata [(i*CHDR_W)+:(CHDR_W)] ), + .s_axis_context_tuser (s_axis_context_tuser [(i*4)+:4] ), + .s_axis_context_tlast (s_axis_context_tlast [i] ), + .s_axis_context_tvalid(s_axis_context_tvalid[i] ), + .s_axis_context_tready(s_axis_context_tready[i] ), + .framer_errors ( ), + .flush_en (data_o_flush_en ), + .flush_timeout (data_o_flush_timeout ), + .flush_active (data_o_flush_active [i] ), + .flush_done (data_o_flush_done [i] ) + ); + end + endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/rfnoc_block_fft.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/rfnoc_block_fft.v new file mode 100644 index 000000000..76ae37524 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/rfnoc_block_fft.v @@ -0,0 +1,559 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_fft +// +// Description: An FFT block for RFNoC. +// +// Parameters: +// +// THIS_PORTID : Control crossbar port to which this block is connected +// CHDR_W : AXIS CHDR interface data width +// MTU : Maximum transmission unit (i.e., maximum packet size) in +// CHDR words is 2**MTU. +// EN_MAGNITUDE_OUT : CORDIC based magnitude calculation +// EN_MAGNITUDE_APPROX_OUT : Multipler-less, lower resource usage +// EN_MAGNITUDE_SQ_OUT : Magnitude squared +// EN_FFT_SHIFT : Center zero frequency bin +// + +module rfnoc_block_fft #( + parameter THIS_PORTID = 0, + parameter CHDR_W = 64, + parameter MTU = 10, + + parameter EN_MAGNITUDE_OUT = 0, + parameter EN_MAGNITUDE_APPROX_OUT = 1, + parameter EN_MAGNITUDE_SQ_OUT = 1, + parameter EN_FFT_SHIFT = 1 + ) +( + //--------------------------------------------------------------------------- + // AXIS CHDR Port + //--------------------------------------------------------------------------- + + input wire rfnoc_chdr_clk, + input wire ce_clk, + + // CHDR inputs from framework + input wire [CHDR_W-1:0] s_rfnoc_chdr_tdata, + input wire s_rfnoc_chdr_tlast, + input wire s_rfnoc_chdr_tvalid, + output wire s_rfnoc_chdr_tready, + + // CHDR outputs to framework + output wire [CHDR_W-1:0] m_rfnoc_chdr_tdata, + output wire m_rfnoc_chdr_tlast, + output wire m_rfnoc_chdr_tvalid, + input wire m_rfnoc_chdr_tready, + + // Backend interface + input wire [511:0] rfnoc_core_config, + output wire [511:0] rfnoc_core_status, + + //--------------------------------------------------------------------------- + // AXIS CTRL Port + //--------------------------------------------------------------------------- + + input wire rfnoc_ctrl_clk, + + // CTRL port requests from framework + input wire [31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + + // CTRL port requests to framework + output wire [31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready +); + + // These are the only supported values for now + localparam ITEM_W = 32; + localparam NIPC = 1; + + localparam NOC_ID = 32'hFF70_0000; + + `include "../../core/rfnoc_axis_ctrl_utils.vh" + + //--------------------------------------------------------------------------- + // Signal Declarations + //--------------------------------------------------------------------------- + + wire rfnoc_chdr_rst; + + wire ctrlport_req_wr; + wire ctrlport_req_rd; + wire [19:0] ctrlport_req_addr; + wire [31:0] ctrlport_req_data; + wire ctrlport_req_has_time; + wire [63:0] ctrlport_req_time; + wire ctrlport_resp_ack; + wire [31:0] ctrlport_resp_data; + + wire [ITEM_W-1:0] axis_to_fft_tdata; + wire axis_to_fft_tlast; + wire axis_to_fft_tvalid; + wire axis_to_fft_tready; + + wire [ITEM_W-1:0] axis_from_fft_tdata; + wire axis_from_fft_tlast; + wire axis_from_fft_tvalid; + wire axis_from_fft_tready; + + wire [CHDR_W-1:0] m_axis_context_tdata; + wire [ 3:0] m_axis_context_tuser; + wire [ 0:0] m_axis_context_tlast; + wire [ 0:0] m_axis_context_tvalid; + wire [ 0:0] m_axis_context_tready; + + wire [CHDR_W-1:0] s_axis_context_tdata; + wire [ 3:0] s_axis_context_tuser; + wire [ 0:0] s_axis_context_tlast; + wire [ 0:0] s_axis_context_tvalid; + wire [ 0:0] s_axis_context_tready; + + wire ce_rst; + + // Cross the CHDR reset to the radio_clk domain + pulse_synchronizer #( + .MODE ("POSEDGE") + ) ctrl_rst_sync_i ( + .clk_a (rfnoc_chdr_clk), + .rst_a (1'b0), + .pulse_a (rfnoc_chdr_rst), + .busy_a (), + .clk_b (ce_clk), + .pulse_b (ce_rst) + ); + + //--------------------------------------------------------------------------- + // NoC Shell + //--------------------------------------------------------------------------- + + noc_shell_fft #( + .NOC_ID (NOC_ID ), + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W ), + .CTRLPORT_SLV_EN(0 ), + .CTRLPORT_MST_EN(1 ), + .SYNC_CLKS (0 ), + .NUM_DATA_I (1 ), + .NUM_DATA_O (1 ), + .ITEM_W (ITEM_W ), + .NIPC (NIPC ), + .PYLD_FIFO_SIZE (MTU ), + .CTXT_FIFO_SIZE (1 ), + .MTU (MTU ) + ) noc_shell_fft_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk ), + .rfnoc_chdr_rst (rfnoc_chdr_rst ), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk ), + .rfnoc_ctrl_rst ( ), + .rfnoc_core_config (rfnoc_core_config ), + .rfnoc_core_status (rfnoc_core_status ), + .s_rfnoc_chdr_tdata (s_rfnoc_chdr_tdata ), + .s_rfnoc_chdr_tlast (s_rfnoc_chdr_tlast ), + .s_rfnoc_chdr_tvalid (s_rfnoc_chdr_tvalid ), + .s_rfnoc_chdr_tready (s_rfnoc_chdr_tready ), + .m_rfnoc_chdr_tdata (m_rfnoc_chdr_tdata ), + .m_rfnoc_chdr_tlast (m_rfnoc_chdr_tlast ), + .m_rfnoc_chdr_tvalid (m_rfnoc_chdr_tvalid ), + .m_rfnoc_chdr_tready (m_rfnoc_chdr_tready ), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata ), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast ), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid ), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready ), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata ), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast ), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid ), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready ), + .ctrlport_clk (ce_clk ), + .ctrlport_rst (ce_rst ), + .m_ctrlport_req_wr (ctrlport_req_wr ), + .m_ctrlport_req_rd (ctrlport_req_rd ), + .m_ctrlport_req_addr (ctrlport_req_addr ), + .m_ctrlport_req_data (ctrlport_req_data ), + .m_ctrlport_req_byte_en ( ), + .m_ctrlport_req_has_time (ctrlport_req_has_time), + .m_ctrlport_req_time (ctrlport_req_time ), + .m_ctrlport_resp_ack (ctrlport_resp_ack ), + .m_ctrlport_resp_status (AXIS_CTRL_STS_OKAY ), + .m_ctrlport_resp_data (ctrlport_resp_data ), + .s_ctrlport_req_wr (1'b0 ), + .s_ctrlport_req_rd (1'b0 ), + .s_ctrlport_req_addr (20'b0 ), + .s_ctrlport_req_portid (10'b0 ), + .s_ctrlport_req_rem_epid (16'b0 ), + .s_ctrlport_req_rem_portid(10'b0 ), + .s_ctrlport_req_data (32'b0 ), + .s_ctrlport_req_byte_en (4'b0 ), + .s_ctrlport_req_has_time (1'b0 ), + .s_ctrlport_req_time (64'b0 ), + .s_ctrlport_resp_ack ( ), + .s_ctrlport_resp_status ( ), + .s_ctrlport_resp_data ( ), + .axis_data_clk (ce_clk ), + .axis_data_rst (ce_rst ), + .m_axis_payload_tdata (axis_to_fft_tdata ), + .m_axis_payload_tkeep ( ), + .m_axis_payload_tlast (axis_to_fft_tlast ), + .m_axis_payload_tvalid (axis_to_fft_tvalid ), + .m_axis_payload_tready (axis_to_fft_tready ), + .s_axis_payload_tdata (axis_from_fft_tdata ), + .s_axis_payload_tkeep ({1*NIPC{1'b1}} ), + .s_axis_payload_tlast (axis_from_fft_tlast ), + .s_axis_payload_tvalid (axis_from_fft_tvalid ), + .s_axis_payload_tready (axis_from_fft_tready ), + .m_axis_context_tdata (m_axis_context_tdata ), + .m_axis_context_tuser (m_axis_context_tuser ), + .m_axis_context_tlast (m_axis_context_tlast ), + .m_axis_context_tvalid (m_axis_context_tvalid), + .m_axis_context_tready (m_axis_context_tready), + .s_axis_context_tdata (s_axis_context_tdata ), + .s_axis_context_tuser (s_axis_context_tuser ), + .s_axis_context_tlast (s_axis_context_tlast ), + .s_axis_context_tvalid (s_axis_context_tvalid), + .s_axis_context_tready (s_axis_context_tready) + ); + + // The input packets are the same configuration as the output packets, so + // just use the header information for each incoming to create the header for + // each outgoing packet. This is done by connecting m_axis_context to + // directly to s_axis_context. + assign s_axis_context_tdata = m_axis_context_tdata; + assign s_axis_context_tuser = m_axis_context_tuser; + assign s_axis_context_tlast = m_axis_context_tlast; + assign s_axis_context_tvalid = m_axis_context_tvalid; + assign m_axis_context_tready = s_axis_context_tready; + + wire [ 8-1:0] set_addr; + wire [32-1:0] set_data; + wire set_has_time; + wire set_stb; + wire [ 8-1:0] rb_addr; + reg [64-1:0] rb_data; + + ctrlport_to_settings_bus # ( + .NUM_PORTS (1) + ) ctrlport_to_settings_bus_i ( + .ctrlport_clk (ce_clk), + .ctrlport_rst (ce_rst), + .s_ctrlport_req_wr (ctrlport_req_wr), + .s_ctrlport_req_rd (ctrlport_req_rd), + .s_ctrlport_req_addr (ctrlport_req_addr), + .s_ctrlport_req_data (ctrlport_req_data), + .s_ctrlport_req_has_time (ctrlport_req_has_time), + .s_ctrlport_req_time (ctrlport_req_time), + .s_ctrlport_resp_ack (ctrlport_resp_ack), + .s_ctrlport_resp_data (ctrlport_resp_data), + .set_data (set_data), + .set_addr (set_addr), + .set_stb (set_stb), + .set_time (), + .set_has_time (set_has_time), + .rb_stb (1'b1), + .rb_addr (rb_addr), + .rb_data (rb_data)); + + localparam MAX_FFT_SIZE_LOG2 = 11; + + localparam [31:0] SR_FFT_RESET = 131; + localparam [31:0] SR_FFT_SIZE_LOG2 = 132; + localparam [31:0] SR_MAGNITUDE_OUT = 133; + localparam [31:0] SR_FFT_DIRECTION = 134; + localparam [31:0] SR_FFT_SCALING = 135; + localparam [31:0] SR_FFT_SHIFT_CONFIG = 136; + + // FFT Output + localparam [1:0] COMPLEX_OUT = 0; + localparam [1:0] MAG_OUT = 1; + localparam [1:0] MAG_SQ_OUT = 2; + + // FFT Direction + localparam [0:0] FFT_REVERSE = 0; + localparam [0:0] FFT_FORWARD = 1; + + wire [1:0] magnitude_out; + wire [31:0] fft_data_o_tdata; + wire fft_data_o_tlast; + wire fft_data_o_tvalid; + wire fft_data_o_tready; + wire [15:0] fft_data_o_tuser; + wire [31:0] fft_shift_o_tdata; + wire fft_shift_o_tlast; + wire fft_shift_o_tvalid; + wire fft_shift_o_tready; + wire [31:0] fft_mag_i_tdata, fft_mag_o_tdata, fft_mag_o_tdata_int; + wire fft_mag_i_tlast, fft_mag_o_tlast; + wire fft_mag_i_tvalid, fft_mag_o_tvalid; + wire fft_mag_i_tready, fft_mag_o_tready; + wire [31:0] fft_mag_sq_i_tdata, fft_mag_sq_o_tdata; + wire fft_mag_sq_i_tlast, fft_mag_sq_o_tlast; + wire fft_mag_sq_i_tvalid, fft_mag_sq_o_tvalid; + wire fft_mag_sq_i_tready, fft_mag_sq_o_tready; + wire [31:0] fft_mag_round_i_tdata, fft_mag_round_o_tdata; + wire fft_mag_round_i_tlast, fft_mag_round_o_tlast; + wire fft_mag_round_i_tvalid, fft_mag_round_o_tvalid; + wire fft_mag_round_i_tready, fft_mag_round_o_tready; + + // Settings Registers + wire fft_reset; + setting_reg #( + .my_addr(SR_FFT_RESET), .awidth(8), .width(1)) + sr_fft_reset ( + .clk(ce_clk), .rst(ce_rst), + .strobe(set_stb), .addr(set_addr), .in(set_data), .out(fft_reset), .changed()); + + // Two instances of FFT size register, one for FFT core and one for FFT shift + localparam DEFAULT_FFT_SIZE = 8; // 256 + wire [7:0] fft_size_log2_tdata ,fft_core_size_log2_tdata; + wire fft_size_log2_tvalid, fft_core_size_log2_tvalid, fft_size_log2_tready, fft_core_size_log2_tready; + axi_setting_reg #( + .ADDR(SR_FFT_SIZE_LOG2), .AWIDTH(8), .WIDTH(8), .DATA_AT_RESET(DEFAULT_FFT_SIZE), .VALID_AT_RESET(1)) + sr_fft_size_log2 ( + .clk(ce_clk), .reset(ce_rst), + .set_stb(set_stb), .set_addr(set_addr), .set_data(set_data), + .o_tdata(fft_size_log2_tdata), .o_tlast(), .o_tvalid(fft_size_log2_tvalid), .o_tready(fft_size_log2_tready)); + + axi_setting_reg #( + .ADDR(SR_FFT_SIZE_LOG2), .AWIDTH(8), .WIDTH(8), .DATA_AT_RESET(DEFAULT_FFT_SIZE), .VALID_AT_RESET(1)) + sr_fft_size_log2_2 ( + .clk(ce_clk), .reset(ce_rst), + .set_stb(set_stb), .set_addr(set_addr), .set_data(set_data), + .o_tdata(fft_core_size_log2_tdata), .o_tlast(), .o_tvalid(fft_core_size_log2_tvalid), .o_tready(fft_core_size_log2_tready)); + + // Forward = 0, Reverse = 1 + localparam DEFAULT_FFT_DIRECTION = 0; + wire fft_direction_tdata; + wire fft_direction_tvalid, fft_direction_tready; + axi_setting_reg #( + .ADDR(SR_FFT_DIRECTION), .AWIDTH(8), .WIDTH(1), .DATA_AT_RESET(DEFAULT_FFT_DIRECTION), .VALID_AT_RESET(1)) + sr_fft_direction ( + .clk(ce_clk), .reset(ce_rst), + .set_stb(set_stb), .set_addr(set_addr), .set_data(set_data), + .o_tdata(fft_direction_tdata), .o_tlast(), .o_tvalid(fft_direction_tvalid), .o_tready(fft_direction_tready)); + + localparam [11:0] DEFAULT_FFT_SCALING = 12'b011010101010; // Conservative 1/N scaling + wire [11:0] fft_scaling_tdata; + wire fft_scaling_tvalid, fft_scaling_tready; + axi_setting_reg #( + .ADDR(SR_FFT_SCALING), .AWIDTH(8), .WIDTH(12), .DATA_AT_RESET(DEFAULT_FFT_SCALING), .VALID_AT_RESET(1)) + sr_fft_scaling ( + .clk(ce_clk), .reset(ce_rst), + .set_stb(set_stb), .set_addr(set_addr), .set_data(set_data), + .o_tdata(fft_scaling_tdata), .o_tlast(), .o_tvalid(fft_scaling_tvalid), .o_tready(fft_scaling_tready)); + + wire [1:0] fft_shift_config_tdata; + wire fft_shift_config_tvalid, fft_shift_config_tready; + axi_setting_reg #( + .ADDR(SR_FFT_SHIFT_CONFIG), .AWIDTH(8), .WIDTH(2)) + sr_fft_shift_config ( + .clk(ce_clk), .reset(ce_rst), + .set_stb(set_stb), .set_addr(set_addr), .set_data(set_data), + .o_tdata(fft_shift_config_tdata), .o_tlast(), .o_tvalid(fft_shift_config_tvalid), .o_tready(fft_shift_config_tready)); + + // Synchronize writing configuration to the FFT core + reg fft_config_ready; + wire fft_config_write = fft_config_ready & axis_to_fft_tvalid & axis_to_fft_tready; + always @(posedge ce_clk) begin + if (ce_rst | fft_reset) begin + fft_config_ready <= 1'b1; + end else begin + if (fft_config_write) begin + fft_config_ready <= 1'b0; + end else if (axis_to_fft_tlast) begin + fft_config_ready <= 1'b1; + end + end + end + + wire [23:0] fft_config_tdata = {3'd0, fft_scaling_tdata, fft_direction_tdata, fft_core_size_log2_tdata}; + wire fft_config_tvalid = fft_config_write & (fft_scaling_tvalid | fft_direction_tvalid | fft_core_size_log2_tvalid); + wire fft_config_tready; + assign fft_core_size_log2_tready = fft_config_tready & fft_config_write; + assign fft_direction_tready = fft_config_tready & fft_config_write; + assign fft_scaling_tready = fft_config_tready & fft_config_write; + axi_fft inst_axi_fft ( + .aclk(ce_clk), .aresetn(~(fft_reset)), + .s_axis_data_tvalid(axis_to_fft_tvalid), + .s_axis_data_tready(axis_to_fft_tready), + .s_axis_data_tlast(axis_to_fft_tlast), + .s_axis_data_tdata({axis_to_fft_tdata[15:0],axis_to_fft_tdata[31:16]}), + .m_axis_data_tvalid(fft_data_o_tvalid), + .m_axis_data_tready(fft_data_o_tready), + .m_axis_data_tlast(fft_data_o_tlast), + .m_axis_data_tdata({fft_data_o_tdata[15:0],fft_data_o_tdata[31:16]}), + .m_axis_data_tuser(fft_data_o_tuser), // FFT index + .s_axis_config_tdata(fft_config_tdata), + .s_axis_config_tvalid(fft_config_tvalid), + .s_axis_config_tready(fft_config_tready), + .event_frame_started(), + .event_tlast_unexpected(), + .event_tlast_missing(), + .event_status_channel_halt(), + .event_data_in_channel_halt(), + .event_data_out_channel_halt()); + + // Mux control signals + assign fft_shift_o_tready = (magnitude_out == MAG_OUT) ? fft_mag_i_tready : + (magnitude_out == MAG_SQ_OUT) ? fft_mag_sq_i_tready : axis_from_fft_tready; + assign fft_mag_i_tvalid = (magnitude_out == MAG_OUT) ? fft_shift_o_tvalid : 1'b0; + assign fft_mag_i_tlast = (magnitude_out == MAG_OUT) ? fft_shift_o_tlast : 1'b0; + assign fft_mag_i_tdata = fft_shift_o_tdata; + assign fft_mag_o_tready = (magnitude_out == MAG_OUT) ? fft_mag_round_i_tready : 1'b0; + assign fft_mag_sq_i_tvalid = (magnitude_out == MAG_SQ_OUT) ? fft_shift_o_tvalid : 1'b0; + assign fft_mag_sq_i_tlast = (magnitude_out == MAG_SQ_OUT) ? fft_shift_o_tlast : 1'b0; + assign fft_mag_sq_i_tdata = fft_shift_o_tdata; + assign fft_mag_sq_o_tready = (magnitude_out == MAG_SQ_OUT) ? fft_mag_round_i_tready : 1'b0; + assign fft_mag_round_i_tvalid = (magnitude_out == MAG_OUT) ? fft_mag_o_tvalid : + (magnitude_out == MAG_SQ_OUT) ? fft_mag_sq_o_tvalid : 1'b0; + assign fft_mag_round_i_tlast = (magnitude_out == MAG_OUT) ? fft_mag_o_tlast : + (magnitude_out == MAG_SQ_OUT) ? fft_mag_sq_o_tlast : 1'b0; + assign fft_mag_round_i_tdata = (magnitude_out == MAG_OUT) ? fft_mag_o_tdata : fft_mag_sq_o_tdata; + assign fft_mag_round_o_tready = axis_from_fft_tready; + assign axis_from_fft_tvalid = (magnitude_out == MAG_OUT | magnitude_out == MAG_SQ_OUT) ? fft_mag_round_o_tvalid : fft_shift_o_tvalid; + assign axis_from_fft_tlast = (magnitude_out == MAG_OUT | magnitude_out == MAG_SQ_OUT) ? fft_mag_round_o_tlast : fft_shift_o_tlast; + assign axis_from_fft_tdata = (magnitude_out == MAG_OUT | magnitude_out == MAG_SQ_OUT) ? fft_mag_round_o_tdata : fft_shift_o_tdata; + + // Conditionally synth magnitude / magnitude^2 logic + generate + if (EN_MAGNITUDE_OUT | EN_MAGNITUDE_APPROX_OUT | EN_MAGNITUDE_SQ_OUT) begin : generate_magnitude_out + setting_reg #( + .my_addr(SR_MAGNITUDE_OUT), .awidth(8), .width(2)) + sr_magnitude_out ( + .clk(ce_clk), .rst(ce_rst), + .strobe(set_stb), .addr(set_addr), .in(set_data), .out(magnitude_out), .changed()); + end else begin : generate_magnitude_out_else + // Magnitude calculation logic not included, so always bypass + assign magnitude_out = 2'd0; + end + + if (EN_FFT_SHIFT) begin : generate_fft_shift + fft_shift #( + .MAX_FFT_SIZE_LOG2(MAX_FFT_SIZE_LOG2), + .WIDTH(32)) + inst_fft_shift ( + .clk(ce_clk), .reset(ce_rst | fft_reset), + .config_tdata(fft_shift_config_tdata), + .config_tvalid(fft_shift_config_tvalid), + .config_tready(fft_shift_config_tready), + .fft_size_log2_tdata(fft_size_log2_tdata[$clog2(MAX_FFT_SIZE_LOG2)-1:0]), + .fft_size_log2_tvalid(fft_size_log2_tvalid), + .fft_size_log2_tready(fft_size_log2_tready), + .i_tdata(fft_data_o_tdata), + .i_tlast(fft_data_o_tlast), + .i_tvalid(fft_data_o_tvalid), + .i_tready(fft_data_o_tready), + .i_tuser(fft_data_o_tuser[MAX_FFT_SIZE_LOG2-1:0]), + .o_tdata(fft_shift_o_tdata), + .o_tlast(fft_shift_o_tlast), + .o_tvalid(fft_shift_o_tvalid), + .o_tready(fft_shift_o_tready)); + end + else begin : generate_fft_shift_else + assign fft_shift_o_tdata = fft_data_o_tdata; + assign fft_shift_o_tlast = fft_data_o_tlast; + assign fft_shift_o_tvalid = fft_data_o_tvalid; + assign fft_data_o_tready = fft_shift_o_tready; + end + + // More accurate magnitude calculation takes precedence if enabled + if (EN_MAGNITUDE_OUT) begin : generate_complex_to_magphase + complex_to_magphase + inst_complex_to_magphase ( + .aclk(ce_clk), .aresetn(~(ce_rst | fft_reset)), + .s_axis_cartesian_tvalid(fft_mag_i_tvalid), + .s_axis_cartesian_tlast(fft_mag_i_tlast), + .s_axis_cartesian_tready(fft_mag_i_tready), + .s_axis_cartesian_tdata(fft_mag_i_tdata), + .m_axis_dout_tvalid(fft_mag_o_tvalid), + .m_axis_dout_tlast(fft_mag_o_tlast), + .m_axis_dout_tdata(fft_mag_o_tdata_int), + .m_axis_dout_tready(fft_mag_o_tready)); + assign fft_mag_o_tdata = {1'b0, fft_mag_o_tdata_int[15:0], 15'd0}; + end + else if (EN_MAGNITUDE_APPROX_OUT) begin : generate_complex_to_mag_approx + complex_to_mag_approx + inst_complex_to_mag_approx ( + .clk(ce_clk), .reset(ce_rst | fft_reset), .clear(1'b0), + .i_tvalid(fft_mag_i_tvalid), + .i_tlast(fft_mag_i_tlast), + .i_tready(fft_mag_i_tready), + .i_tdata(fft_mag_i_tdata), + .o_tvalid(fft_mag_o_tvalid), + .o_tlast(fft_mag_o_tlast), + .o_tready(fft_mag_o_tready), + .o_tdata(fft_mag_o_tdata_int[15:0])); + assign fft_mag_o_tdata = {1'b0, fft_mag_o_tdata_int[15:0], 15'd0}; + end + else begin : generate_complex_to_mag_approx_else + assign fft_mag_o_tdata = fft_mag_i_tdata; + assign fft_mag_o_tlast = fft_mag_i_tlast; + assign fft_mag_o_tvalid = fft_mag_i_tvalid; + assign fft_mag_i_tready = fft_mag_o_tready; + end + + if (EN_MAGNITUDE_SQ_OUT) begin : generate_complex_to_magsq + complex_to_magsq + inst_complex_to_magsq ( + .clk(ce_clk), .reset(ce_rst | fft_reset), .clear(1'b0), + .i_tvalid(fft_mag_sq_i_tvalid), + .i_tlast(fft_mag_sq_i_tlast), + .i_tready(fft_mag_sq_i_tready), + .i_tdata(fft_mag_sq_i_tdata), + .o_tvalid(fft_mag_sq_o_tvalid), + .o_tlast(fft_mag_sq_o_tlast), + .o_tready(fft_mag_sq_o_tready), + .o_tdata(fft_mag_sq_o_tdata)); + end + else begin : generate_complex_to_magsq_else + assign fft_mag_sq_o_tdata = fft_mag_sq_i_tdata; + assign fft_mag_sq_o_tlast = fft_mag_sq_i_tlast; + assign fft_mag_sq_o_tvalid = fft_mag_sq_i_tvalid; + assign fft_mag_sq_i_tready = fft_mag_sq_o_tready; + end + + // Convert to SC16 + if (EN_MAGNITUDE_OUT | EN_MAGNITUDE_APPROX_OUT | EN_MAGNITUDE_SQ_OUT) begin : generate_axi_round_and_clip + axi_round_and_clip #( + .WIDTH_IN(32), + .WIDTH_OUT(16), + .CLIP_BITS(1)) + inst_axi_round_and_clip ( + .clk(ce_clk), .reset(ce_rst | fft_reset), + .i_tdata(fft_mag_round_i_tdata), + .i_tlast(fft_mag_round_i_tlast), + .i_tvalid(fft_mag_round_i_tvalid), + .i_tready(fft_mag_round_i_tready), + .o_tdata(fft_mag_round_o_tdata[31:16]), + .o_tlast(fft_mag_round_o_tlast), + .o_tvalid(fft_mag_round_o_tvalid), + .o_tready(fft_mag_round_o_tready)); + assign fft_mag_round_o_tdata[15:0] = {16{16'd0}}; + end + else begin : generate_axi_round_and_clip_else + assign fft_mag_round_o_tdata = fft_mag_round_i_tdata; + assign fft_mag_round_o_tlast = fft_mag_round_i_tlast; + assign fft_mag_round_o_tvalid = fft_mag_round_i_tvalid; + assign fft_mag_round_i_tready = fft_mag_round_o_tready; + end + endgenerate + + // Readback registers + always @* + case(rb_addr) + 3'd0 : rb_data <= {63'd0, fft_reset}; + 3'd1 : rb_data <= {62'd0, magnitude_out}; + 3'd2 : rb_data <= {fft_size_log2_tdata}; + 3'd3 : rb_data <= {63'd0, fft_direction_tdata}; + 3'd4 : rb_data <= {52'd0, fft_scaling_tdata}; + 3'd5 : rb_data <= {62'd0, fft_shift_config_tdata}; + default : rb_data <= 64'h0BADC0DE0BADC0DE; + endcase + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/rfnoc_block_fft_tb.sv b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/rfnoc_block_fft_tb.sv new file mode 100644 index 000000000..bb46e3cc7 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fft/rfnoc_block_fft_tb.sv @@ -0,0 +1,263 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_fft_tb +// +// Description: Testbench for rfnoc_block_fft +// + +module rfnoc_block_fft_tb(); + + // Include macros and time declarations for use with PkgTestExec + `include "test_exec.svh" + + import PkgTestExec::*; + import PkgChdrUtils::*; + import PkgRfnocBlockCtrlBfm::*; + + //--------------------------------------------------------------------------- + // Local Parameters + //--------------------------------------------------------------------------- + + // Simulation parameters + localparam real CHDR_CLK_PER = 5.0; // Clock rate + localparam int SPP = 256; // Samples per packet + localparam int PKT_SIZE_BYTES = SPP*4; // Bytes per packet + localparam int STALL_PROB = 25; // BFM stall probability + + // Block configuration + localparam int CHDR_W = 64; + localparam int THIS_PORTID = 'h123; + localparam int MTU = 10; + localparam int NUM_PORTS = 1; + localparam int NUM_HB = 3; + localparam int CIC_MAX_DECIM = 255; + + // FFT specific settings + // FFT settings + localparam [31:0] FFT_SIZE = 256; + localparam [31:0] FFT_SIZE_LOG2 = $clog2(FFT_SIZE); + const logic [31:0] FFT_DIRECTION = DUT.FFT_FORWARD; // Forward + localparam [31:0] FFT_SCALING = 12'b011010101010; // Conservative scaling of 1/N + localparam [31:0] FFT_SHIFT_CONFIG = 0; // Normal FFT shift + localparam FFT_BIN = FFT_SIZE/8 + FFT_SIZE/2; // 1/8 sample rate freq + FFT shift + localparam NUM_ITERATIONS = 10; + + //--------------------------------------------------------------------------- + // Clocks + //--------------------------------------------------------------------------- + + bit rfnoc_chdr_clk; + bit rfnoc_ctrl_clk; + + sim_clock_gen #(CHDR_CLK_PER) rfnoc_chdr_clk_gen (.clk(rfnoc_chdr_clk), .rst()); + sim_clock_gen #(CHDR_CLK_PER) rfnoc_ctrl_clk_gen (.clk(rfnoc_ctrl_clk), .rst()); + + //--------------------------------------------------------------------------- + // Bus Functional Models + //--------------------------------------------------------------------------- + + RfnocBackendIf backend (rfnoc_chdr_clk, rfnoc_ctrl_clk); + AxiStreamIf #(32) m_ctrl (rfnoc_ctrl_clk, 1'b0); + AxiStreamIf #(32) s_ctrl (rfnoc_ctrl_clk, 1'b0); + AxiStreamIf #(CHDR_W) m_chdr (rfnoc_chdr_clk, 1'b0); + AxiStreamIf #(CHDR_W) s_chdr (rfnoc_chdr_clk, 1'b0); + + // Bus functional model for a software block controller + RfnocBlockCtrlBfm #(.CHDR_W(CHDR_W)) blk_ctrl = + new(backend, m_ctrl, s_ctrl); + + // Connect block controller to BFMs + initial begin + blk_ctrl.connect_master_data_port(0, m_chdr, PKT_SIZE_BYTES); + blk_ctrl.connect_slave_data_port(0, s_chdr); + blk_ctrl.set_master_stall_prob(0, STALL_PROB); + blk_ctrl.set_slave_stall_prob(0, STALL_PROB); + end + + //--------------------------------------------------------------------------- + // DUT + //--------------------------------------------------------------------------- + + rfnoc_block_fft #( + .THIS_PORTID (0 ), + .CHDR_W (64 ), + .MTU (MTU), + + .EN_MAGNITUDE_OUT (0 ), + .EN_MAGNITUDE_APPROX_OUT(1 ), + .EN_MAGNITUDE_SQ_OUT (1 ), + .EN_FFT_SHIFT (1 ) + ) DUT ( + .rfnoc_chdr_clk (backend.chdr_clk), + .ce_clk (backend.chdr_clk), + .s_rfnoc_chdr_tdata (m_chdr.tdata ), + .s_rfnoc_chdr_tlast (m_chdr.tlast ), + .s_rfnoc_chdr_tvalid(m_chdr.tvalid ), + .s_rfnoc_chdr_tready(m_chdr.tready ), + + .m_rfnoc_chdr_tdata (s_chdr.tdata ), + .m_rfnoc_chdr_tlast (s_chdr.tlast ), + .m_rfnoc_chdr_tvalid(s_chdr.tvalid ), + .m_rfnoc_chdr_tready(s_chdr.tready ), + + .rfnoc_core_config (backend.cfg ), + .rfnoc_core_status (backend.sts ), + .rfnoc_ctrl_clk (backend.ctrl_clk), + + .s_rfnoc_ctrl_tdata (m_ctrl.tdata ), + .s_rfnoc_ctrl_tlast (m_ctrl.tlast ), + .s_rfnoc_ctrl_tvalid(m_ctrl.tvalid ), + .s_rfnoc_ctrl_tready(m_ctrl.tready ), + + .m_rfnoc_ctrl_tdata (s_ctrl.tdata ), + .m_rfnoc_ctrl_tlast (s_ctrl.tlast ), + .m_rfnoc_ctrl_tvalid(s_ctrl.tvalid ), + .m_rfnoc_ctrl_tready(s_ctrl.tready ) + ); + + //--------------------------------------------------------------------------- + // Helper Tasks + //--------------------------------------------------------------------------- + + // Translate the desired register access to a ctrlport write request. + task automatic write_reg(int port, byte addr, bit [31:0] value); + blk_ctrl.reg_write(256*8*port + addr*8, value); + endtask : write_reg + + // Translate the desired register access to a ctrlport read request. + task automatic read_user_reg(int port, byte addr, output logic [63:0] value); + blk_ctrl.reg_read(256*8*port + addr*8 + 0, value[31: 0]); + blk_ctrl.reg_read(256*8*port + addr*8 + 4, value[63:32]); + endtask : read_user_reg + + //--------------------------------------------------------------------------- + // Test Process + //--------------------------------------------------------------------------- + + task automatic send_sine_wave ( + input int unsigned port + ); + // Send a sine wave + fork + begin + chdr_word_t send_payload[$]; + + for (int n = 0; n < NUM_ITERATIONS; n++) begin + for (int i = 0; i < (FFT_SIZE/8); i++) begin + send_payload.push_back({ 16'h5A82, 16'h5A82, 16'h7FFF, 16'h0000}); + send_payload.push_back({-16'h5A82, 16'h5A82, 16'h0000, 16'h7FFF}); + send_payload.push_back({-16'h5A82,-16'h5A82,-16'h7FFF, 16'h0000}); + send_payload.push_back({ 16'h5A82,-16'h5A82, 16'h0000,-16'h7FFF}); + end + + blk_ctrl.send(port, send_payload); + blk_ctrl.wait_complete(port); + send_payload = {}; + end + end + + begin + string s; + chdr_word_t recv_payload[$], temp_payload[$]; + int data_bytes; + logic [15:0] real_val; + logic [15:0] cplx_val; + + for (int n = 0; n < NUM_ITERATIONS; n++) begin + blk_ctrl.recv(port, recv_payload, data_bytes); + + `ASSERT_ERROR(recv_payload.size * 2 == FFT_SIZE, "received wrong amount of data"); + + for (int k = 0; k < FFT_SIZE/2; k++) begin + chdr_word_t payload_word; + payload_word = recv_payload.pop_front(); + + for (int i = 0; i < 2; i++) begin + {real_val, cplx_val} = payload_word; + payload_word = payload_word[63:32]; + + if (2*k+i == FFT_BIN) begin + // Assert that for the special case of a 1/8th sample rate sine wave input, + // the real part of the corresponding 1/8th sample rate FFT bin should always be greater than 0 and + // the complex part equal to 0. + + `ASSERT_ERROR(real_val > 32'd0, "FFT bin real part is not greater than 0!"); + `ASSERT_ERROR(cplx_val == 32'd0, "FFT bin complex part is not 0!"); + end else begin + // Assert all other FFT bins should be 0 for both complex and real parts + `ASSERT_ERROR(real_val == 32'd0, "FFT bin real part is not 0!"); + `ASSERT_ERROR(cplx_val == 32'd0, "FFT bin complex part is not 0!"); + end + end + end + end + end + join + endtask + + initial begin : tb_main + const int port = 0; + test.start_tb("rfnoc_block_fft_tb"); + + // Start the BFMs running + blk_ctrl.run(); + + //------------------------------------------------------------------------- + // Reset + //------------------------------------------------------------------------- + + test.start_test("Wait for Reset", 10us); + fork + blk_ctrl.reset_chdr(); + blk_ctrl.reset_ctrl(); + join; + test.end_test(); + + + //------------------------------------------------------------------------- + // Check NoC ID and Block Info + //------------------------------------------------------------------------- + + test.start_test("Verify Block Info", 2us); + `ASSERT_ERROR(blk_ctrl.get_noc_id() == DUT.NOC_ID, "Incorrect NOC_ID Value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_i() == NUM_PORTS, "Incorrect NUM_DATA_I Value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_o() == NUM_PORTS, "Incorrect NUM_DATA_O Value"); + `ASSERT_ERROR(blk_ctrl.get_mtu() == MTU, "Incorrect MTU Value"); + test.end_test(); + + //------------------------------------------------------------------------- + // Setup FFT + //------------------------------------------------------------------------- + + test.start_test("Setup FFT", 10us); + write_reg(port, DUT.SR_FFT_SIZE_LOG2, FFT_SIZE_LOG2); + write_reg(port, DUT.SR_FFT_DIRECTION, FFT_DIRECTION); + write_reg(port, DUT.SR_FFT_SCALING, FFT_SCALING); + write_reg(port, DUT.SR_FFT_SHIFT_CONFIG, FFT_SHIFT_CONFIG); + write_reg(port, DUT.SR_MAGNITUDE_OUT, DUT.COMPLEX_OUT); // Enable real/imag out + test.end_test(); + + //-------------------------------------------------------------------------76 + // Test sine wave + //------------------------------------------------------------------------- + + test.start_test("Test sine wave", 20us); + send_sine_wave (port); + test.end_test(); + + //------------------------------------------------------------------------- + // Finish + //------------------------------------------------------------------------- + + // End the TB, but don't $finish, since we don't want to kill other + // instances of this testbench that may be running. + test.end_tb(0); + + // Kill the clocks to end this instance of the testbench + rfnoc_chdr_clk_gen.kill(); + rfnoc_ctrl_clk_gen.kill(); + end +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/Makefile b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/Makefile new file mode 100644 index 000000000..7d6d84f82 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/Makefile @@ -0,0 +1,46 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/utils/Makefile.srcs +include Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(RFNOC_CORE_SRCS) \ +$(RFNOC_UTIL_SRCS) \ +$(RFNOC_OOT_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +SIM_TOP = rfnoc_block_fir_filter_tb + +# Add test bench, user design under test, and +# additional user created files +SIM_SRCS = \ +$(abspath rfnoc_block_fir_filter_tb.sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/Makefile.srcs b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/Makefile.srcs new file mode 100644 index 000000000..f8c696096 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/Makefile.srcs @@ -0,0 +1,12 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +RFNOC_OOT_SRCS += $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/blocks/rfnoc_block_fir_filter/, \ +noc_shell_fir_filter.v \ +rfnoc_fir_filter_regs.vh \ +rfnoc_fir_filter_core.v \ +rfnoc_block_fir_filter.v \ +)) diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/noc_shell_fir_filter.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/noc_shell_fir_filter.v new file mode 100644 index 000000000..ce9a66fd9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/noc_shell_fir_filter.v @@ -0,0 +1,297 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: noc_shell_fir_filter +// +// Description: A NoC Shell for RFNoC. This should eventually be replaced +// by an auto-generated NoC Shell. +// + +module noc_shell_fir_filter #( + parameter [31:0] NOC_ID = 32 'h0, + parameter [ 9:0] THIS_PORTID = 10 'd0, + parameter CHDR_W = 64, + parameter [ 0:0] CTRLPORT_SLV_EN = 1, + parameter [ 0:0] CTRLPORT_MST_EN = 1, + parameter SYNC_CLKS = 0, + parameter [ 5:0] NUM_DATA_I = 1, + parameter [ 5:0] NUM_DATA_O = 1, + parameter ITEM_W = 32, + parameter NIPC = 2, + parameter PYLD_FIFO_SIZE = 5, + parameter CTXT_FIFO_SIZE = 5, + parameter MTU = 10 +) ( + //--------------------------------------------------------------------------- + // Framework Interface + //--------------------------------------------------------------------------- + + // RFNoC Framework Clocks and Resets + input wire rfnoc_chdr_clk, + output wire rfnoc_chdr_rst, + input wire rfnoc_ctrl_clk, + output wire rfnoc_ctrl_rst, + // RFNoC Backend Interface + input wire [ 511:0] rfnoc_core_config, + output wire [ 511:0] rfnoc_core_status, + // CHDR Input Ports (from framework) + input wire [(CHDR_W*NUM_DATA_I)-1:0] s_rfnoc_chdr_tdata, + input wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tlast, + input wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tvalid, + output wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tready, + // CHDR Output Ports (to framework) + output wire [(CHDR_W*NUM_DATA_O)-1:0] m_rfnoc_chdr_tdata, + output wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tlast, + output wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tvalid, + input wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tready, + // AXIS-Ctrl Input Port (from framework) + input wire [ 31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + // AXIS-Ctrl Output Port (to framework) + output wire [ 31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready, + + //--------------------------------------------------------------------------- + // Client Control Port Interface + //--------------------------------------------------------------------------- + + // Clock + input wire ctrlport_clk, + input wire ctrlport_rst, + // Master + output wire m_ctrlport_req_wr, + output wire m_ctrlport_req_rd, + output wire [19:0] m_ctrlport_req_addr, + output wire [31:0] m_ctrlport_req_data, + output wire [ 3:0] m_ctrlport_req_byte_en, + output wire m_ctrlport_req_has_time, + output wire [63:0] m_ctrlport_req_time, + input wire m_ctrlport_resp_ack, + input wire [ 1:0] m_ctrlport_resp_status, + input wire [31:0] m_ctrlport_resp_data, + // Slave + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [ 9:0] s_ctrlport_req_portid, + input wire [15:0] s_ctrlport_req_rem_epid, + input wire [ 9:0] s_ctrlport_req_rem_portid, + input wire [31:0] s_ctrlport_req_data, + input wire [ 3:0] s_ctrlport_req_byte_en, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + output wire s_ctrlport_resp_ack, + output wire [ 1:0] s_ctrlport_resp_status, + output wire [31:0] s_ctrlport_resp_data, + + //--------------------------------------------------------------------------- + // Client Data Interface + //--------------------------------------------------------------------------- + + // Clock + input wire axis_data_clk, + input wire axis_data_rst, + + // Output data stream (to user logic) + output wire [(NUM_DATA_I*ITEM_W*NIPC)-1:0] m_axis_payload_tdata, + output wire [ (NUM_DATA_I*NIPC)-1:0] m_axis_payload_tkeep, + output wire [ NUM_DATA_I-1:0] m_axis_payload_tlast, + output wire [ NUM_DATA_I-1:0] m_axis_payload_tvalid, + input wire [ NUM_DATA_I-1:0] m_axis_payload_tready, + + // Input data stream (from user logic) + input wire [(NUM_DATA_O*ITEM_W*NIPC)-1:0] s_axis_payload_tdata, + input wire [ (NUM_DATA_O*NIPC)-1:0] s_axis_payload_tkeep, + input wire [ NUM_DATA_O-1:0] s_axis_payload_tlast, + input wire [ NUM_DATA_O-1:0] s_axis_payload_tvalid, + output wire [ NUM_DATA_O-1:0] s_axis_payload_tready, + + // Output context stream (to user logic) + output wire [(NUM_DATA_I*CHDR_W)-1:0] m_axis_context_tdata, + output wire [ (4*NUM_DATA_I)-1:0] m_axis_context_tuser, + output wire [ NUM_DATA_I-1:0] m_axis_context_tlast, + output wire [ NUM_DATA_I-1:0] m_axis_context_tvalid, + input wire [ NUM_DATA_I-1:0] m_axis_context_tready, + + // Input context stream (from user logic) + input wire [(NUM_DATA_O*CHDR_W)-1:0] s_axis_context_tdata, + input wire [ (4*NUM_DATA_O)-1:0] s_axis_context_tuser, + input wire [ NUM_DATA_O-1:0] s_axis_context_tlast, + input wire [ NUM_DATA_O-1:0] s_axis_context_tvalid, + output wire [ NUM_DATA_O-1:0] s_axis_context_tready +); + + localparam CTRL_FIFO_SIZE = 5; + + + //--------------------------------------------------------------------------- + // Backend Interface + //--------------------------------------------------------------------------- + + wire data_i_flush_en; + wire [31:0] data_i_flush_timeout; + wire [63:0] data_i_flush_active; + wire [63:0] data_i_flush_done; + wire data_o_flush_en; + wire [31:0] data_o_flush_timeout; + wire [63:0] data_o_flush_active; + wire [63:0] data_o_flush_done; + + backend_iface #( + .NOC_ID (NOC_ID), + .NUM_DATA_I (NUM_DATA_I), + .NUM_DATA_O (NUM_DATA_O), + .CTRL_FIFOSIZE (CTRL_FIFO_SIZE), + .MTU (MTU) + ) backend_iface_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk), + .rfnoc_core_config (rfnoc_core_config), + .rfnoc_core_status (rfnoc_core_status), + .rfnoc_chdr_rst (rfnoc_chdr_rst), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst), + .data_i_flush_en (data_i_flush_en), + .data_i_flush_timeout (data_i_flush_timeout), + .data_i_flush_active (data_i_flush_active), + .data_i_flush_done (data_i_flush_done), + .data_o_flush_en (data_o_flush_en), + .data_o_flush_timeout (data_o_flush_timeout), + .data_o_flush_active (data_o_flush_active), + .data_o_flush_done (data_o_flush_done) + ); + + //--------------------------------------------------------------------------- + // Control Path + //--------------------------------------------------------------------------- + + ctrlport_endpoint #( + .THIS_PORTID (THIS_PORTID ), + .SYNC_CLKS (0 ), + .AXIS_CTRL_MST_EN (CTRLPORT_SLV_EN), + .AXIS_CTRL_SLV_EN (CTRLPORT_MST_EN), + .SLAVE_FIFO_SIZE (CTRL_FIFO_SIZE ) + ) ctrlport_ep_i ( + .rfnoc_ctrl_clk (rfnoc_ctrl_clk ), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst ), + .ctrlport_clk (ctrlport_clk ), + .ctrlport_rst (ctrlport_rst ), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata ), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast ), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid ), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready ), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata ), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast ), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid ), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready ), + .m_ctrlport_req_wr (m_ctrlport_req_wr ), + .m_ctrlport_req_rd (m_ctrlport_req_rd ), + .m_ctrlport_req_addr (m_ctrlport_req_addr ), + .m_ctrlport_req_data (m_ctrlport_req_data ), + .m_ctrlport_req_byte_en (m_ctrlport_req_byte_en ), + .m_ctrlport_req_has_time (m_ctrlport_req_has_time ), + .m_ctrlport_req_time (m_ctrlport_req_time ), + .m_ctrlport_resp_ack (m_ctrlport_resp_ack ), + .m_ctrlport_resp_status (m_ctrlport_resp_status ), + .m_ctrlport_resp_data (m_ctrlport_resp_data ), + .s_ctrlport_req_wr (s_ctrlport_req_wr ), + .s_ctrlport_req_rd (s_ctrlport_req_rd ), + .s_ctrlport_req_addr (s_ctrlport_req_addr ), + .s_ctrlport_req_portid (s_ctrlport_req_portid ), + .s_ctrlport_req_rem_epid (s_ctrlport_req_rem_epid ), + .s_ctrlport_req_rem_portid(s_ctrlport_req_rem_portid), + .s_ctrlport_req_data (s_ctrlport_req_data ), + .s_ctrlport_req_byte_en (s_ctrlport_req_byte_en ), + .s_ctrlport_req_has_time (s_ctrlport_req_has_time ), + .s_ctrlport_req_time (s_ctrlport_req_time ), + .s_ctrlport_resp_ack (s_ctrlport_resp_ack ), + .s_ctrlport_resp_status (s_ctrlport_resp_status ), + .s_ctrlport_resp_data (s_ctrlport_resp_data ) + ); + + //--------------------------------------------------------------------------- + // Data Path + //--------------------------------------------------------------------------- + + genvar i; + generate + + for (i = 0; i < NUM_DATA_I; i = i + 1) begin: chdr_to_data + chdr_to_axis_pyld_ctxt #( + .CHDR_W (CHDR_W ), + .ITEM_W (ITEM_W ), + .NIPC (NIPC ), + .SYNC_CLKS (SYNC_CLKS ), + .CONTEXT_FIFO_SIZE (CTXT_FIFO_SIZE), + .PAYLOAD_FIFO_SIZE (PYLD_FIFO_SIZE), + .CONTEXT_PREFETCH_EN (1 ) + ) chdr_to_axis_pyld_ctxt_i ( + .axis_chdr_clk (rfnoc_chdr_clk ), + .axis_chdr_rst (rfnoc_chdr_rst ), + .axis_data_clk (axis_data_clk ), + .axis_data_rst (axis_data_rst ), + .s_axis_chdr_tdata (s_rfnoc_chdr_tdata [(i*CHDR_W)+:CHDR_W] ), + .s_axis_chdr_tlast (s_rfnoc_chdr_tlast [i] ), + .s_axis_chdr_tvalid (s_rfnoc_chdr_tvalid [i] ), + .s_axis_chdr_tready (s_rfnoc_chdr_tready [i] ), + .m_axis_payload_tdata (m_axis_payload_tdata [(i*ITEM_W*NIPC)+:(ITEM_W*NIPC)]), + .m_axis_payload_tkeep (m_axis_payload_tkeep [(i*NIPC)+:NIPC] ), + .m_axis_payload_tlast (m_axis_payload_tlast [i] ), + .m_axis_payload_tvalid(m_axis_payload_tvalid[i] ), + .m_axis_payload_tready(m_axis_payload_tready[i] ), + .m_axis_context_tdata (m_axis_context_tdata [(i*CHDR_W)+:(CHDR_W)] ), + .m_axis_context_tuser (m_axis_context_tuser [(i*4)+:4] ), + .m_axis_context_tlast (m_axis_context_tlast [i] ), + .m_axis_context_tvalid(m_axis_context_tvalid[i] ), + .m_axis_context_tready(m_axis_context_tready[i] ), + .flush_en (data_i_flush_en ), + .flush_timeout (data_i_flush_timeout ), + .flush_active (data_i_flush_active [i] ), + .flush_done (data_i_flush_done [i] ) + ); + end + + for (i = 0; i < NUM_DATA_O; i = i + 1) begin: data_to_chdr + axis_pyld_ctxt_to_chdr #( + .CHDR_W (CHDR_W ), + .ITEM_W (ITEM_W ), + .NIPC (NIPC ), + .SYNC_CLKS (SYNC_CLKS ), + .CONTEXT_FIFO_SIZE (CTXT_FIFO_SIZE), + .PAYLOAD_FIFO_SIZE (PYLD_FIFO_SIZE), + .CONTEXT_PREFETCH_EN (1 ), + .MTU (MTU ) + ) axis_pyld_ctxt_to_chdr_i ( + .axis_chdr_clk (rfnoc_chdr_clk ), + .axis_chdr_rst (rfnoc_chdr_rst ), + .axis_data_clk (axis_data_clk ), + .axis_data_rst (axis_data_rst ), + .m_axis_chdr_tdata (m_rfnoc_chdr_tdata [(i*CHDR_W)+:CHDR_W] ), + .m_axis_chdr_tlast (m_rfnoc_chdr_tlast [i] ), + .m_axis_chdr_tvalid (m_rfnoc_chdr_tvalid [i] ), + .m_axis_chdr_tready (m_rfnoc_chdr_tready [i] ), + .s_axis_payload_tdata (s_axis_payload_tdata [(i*ITEM_W*NIPC)+:(ITEM_W*NIPC)]), + .s_axis_payload_tkeep (s_axis_payload_tkeep [(i*NIPC)+:NIPC] ), + .s_axis_payload_tlast (s_axis_payload_tlast [i] ), + .s_axis_payload_tvalid(s_axis_payload_tvalid[i] ), + .s_axis_payload_tready(s_axis_payload_tready[i] ), + .s_axis_context_tdata (s_axis_context_tdata [(i*CHDR_W)+:(CHDR_W)] ), + .s_axis_context_tuser (s_axis_context_tuser [(i*4)+:4] ), + .s_axis_context_tlast (s_axis_context_tlast [i] ), + .s_axis_context_tvalid(s_axis_context_tvalid[i] ), + .s_axis_context_tready(s_axis_context_tready[i] ), + .framer_errors ( ), + .flush_en (data_o_flush_en ), + .flush_timeout (data_o_flush_timeout ), + .flush_active (data_o_flush_active [i] ), + .flush_done (data_o_flush_done [i] ) + ); + end + endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/rfnoc_block_fir_filter.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/rfnoc_block_fir_filter.v new file mode 100644 index 000000000..f007049cc --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/rfnoc_block_fir_filter.v @@ -0,0 +1,343 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Description: +// +// Parameterized FIR filter RFNoC block with optional re-loadable +// coefficients. +// +// It has several optimizations for resource utilization such as using half +// the number of DSP slices for symmetric coefficients, skipping coefficients +// that are always set to zero, and using internal DSP slice registers to +// hold coefficients. +// +// For the most efficient DSP slice inference use these settings, set +// COEFF_WIDTH to be less than 18. +// +// Parameters: +// +// COEFF_WIDTH : Coefficient width +// +// NUM_COEFFS : Number of coefficients / filter taps +// +// COEFFS_VEC : Vector of NUM_COEFFS values, each of width +// COEFF_WIDTH, to initialize the filter +// coefficients. Defaults to an impulse. +// +// RELOADABLE_COEFFS : Enable (1) or disable (0) reloading +// coefficients at runtime +// +// SYMMETRIC_COEFFS : Reduce multiplier usage by approximately half +// if coefficients are symmetric +// +// SKIP_ZERO_COEFFS : Reduce multiplier usage by assuming zero valued +// coefficients in DEFAULT_COEFFS are always zero. +// Useful for halfband filters. +// +// USE_EMBEDDED_REGS_COEFFS : Reduce register usage by only using embedded +// registers in DSP slices. Updating taps while +// streaming will cause temporary output +// corruption! +// +// Note: If using USE_EMBEDDED_REGS_COEFFS, coefficients must be written at +// least once since COEFFS_VEC is ignored! +// + + +module rfnoc_block_fir_filter #( + // RFNoC Parameters + parameter THIS_PORTID = 0, + parameter CHDR_W = 64, + parameter NUM_PORTS = 2, + parameter MTU = 10, + // FIR Filter Parameters + parameter COEFF_WIDTH = 16, + parameter NUM_COEFFS = 41, + parameter [NUM_COEFFS*COEFF_WIDTH-1:0] COEFFS_VEC = // Make impulse by default + { + {1'b0, {(COEFF_WIDTH-1){1'b1}} }, // Max positive value + {(COEFF_WIDTH*(NUM_COEFFS-1)){1'b0}} // Zero for remaining coefficients + }, + parameter RELOADABLE_COEFFS = 1, + parameter SYMMETRIC_COEFFS = 0, + parameter SKIP_ZERO_COEFFS = 0, + parameter USE_EMBEDDED_REGS_COEFFS = 1 +)( + // Clock to use for signal processing + input wire ce_clk, + + + //--------------------------------------------------------------------------- + // AXIS CHDR Port + //--------------------------------------------------------------------------- + + input wire rfnoc_chdr_clk, + + // CHDR inputs from framework + input wire [NUM_PORTS*CHDR_W-1:0] s_rfnoc_chdr_tdata, + input wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tlast, + input wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tvalid, + output wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tready, + + // CHDR outputs to framework + output wire [NUM_PORTS*CHDR_W-1:0] m_rfnoc_chdr_tdata, + output wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tlast, + output wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tvalid, + input wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tready, + + // Backend interface + input wire [511:0] rfnoc_core_config, + output wire [511:0] rfnoc_core_status, + + + //--------------------------------------------------------------------------- + // AXIS CTRL Port + //--------------------------------------------------------------------------- + + input wire rfnoc_ctrl_clk, + + // CTRL port requests from framework + input wire [31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + + // CTRL port requests to framework + output wire [31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready +); + + `include "rfnoc_fir_filter_regs.vh" + + // These are the only supported values for now + localparam ITEM_W = 32; + localparam NIPC = 1; + + + //--------------------------------------------------------------------------- + // NoC Shell + //--------------------------------------------------------------------------- + + wire ctrlport_reg_req_wr; + wire ctrlport_reg_req_rd; + wire [19:0] ctrlport_reg_req_addr; + wire [31:0] ctrlport_reg_req_data; + wire ctrlport_reg_resp_ack; + wire [ 1:0] ctrlport_reg_resp_status; + wire [31:0] ctrlport_reg_resp_data; + + wire [(NUM_PORTS*ITEM_W*NIPC)-1:0] axis_to_fir_tdata; + wire [ NUM_PORTS-1:0] axis_to_fir_tlast; + wire [ NUM_PORTS-1:0] axis_to_fir_tvalid; + wire [ NUM_PORTS-1:0] axis_to_fir_tready; + + wire [(NUM_PORTS*ITEM_W*NIPC)-1:0] axis_from_fir_tdata; + wire [ NUM_PORTS-1:0] axis_from_fir_tlast; + wire [ NUM_PORTS-1:0] axis_from_fir_tvalid; + wire [ NUM_PORTS-1:0] axis_from_fir_tready; + + wire [(NUM_PORTS*CHDR_W)-1:0] m_axis_context_tdata; + wire [ (4*NUM_PORTS)-1:0] m_axis_context_tuser; + wire [ NUM_PORTS-1:0] m_axis_context_tlast; + wire [ NUM_PORTS-1:0] m_axis_context_tvalid; + wire [ NUM_PORTS-1:0] m_axis_context_tready; + + wire [(NUM_PORTS*CHDR_W)-1:0] s_axis_context_tdata; + wire [ (4*NUM_PORTS)-1:0] s_axis_context_tuser; + wire [ NUM_PORTS-1:0] s_axis_context_tlast; + wire [ NUM_PORTS-1:0] s_axis_context_tvalid; + wire [ NUM_PORTS-1:0] s_axis_context_tready; + + wire rfnoc_chdr_rst; + wire ce_rst; + + localparam NOC_ID = 32'hF112_0000; + + + // Cross the CHDR reset to the ddc_clk domain + synchronizer ce_rst_sync_i ( + .clk (ce_clk), + .rst (1'b0), + .in (rfnoc_chdr_rst), + .out (ce_rst) + ); + + + noc_shell_fir_filter #( + .NOC_ID (NOC_ID), + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W), + .CTRLPORT_SLV_EN (0), + .CTRLPORT_MST_EN (1), + .NUM_DATA_I (NUM_PORTS), + .NUM_DATA_O (NUM_PORTS), + .ITEM_W (ITEM_W), + .NIPC (NIPC), + .PYLD_FIFO_SIZE (5), + .CTXT_FIFO_SIZE (5), + .MTU (MTU) + ) noc_shell_fir_filter_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .rfnoc_chdr_rst (rfnoc_chdr_rst), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk), + .rfnoc_ctrl_rst (), + .rfnoc_core_config (rfnoc_core_config), + .rfnoc_core_status (rfnoc_core_status), + .s_rfnoc_chdr_tdata (s_rfnoc_chdr_tdata), + .s_rfnoc_chdr_tlast (s_rfnoc_chdr_tlast), + .s_rfnoc_chdr_tvalid (s_rfnoc_chdr_tvalid), + .s_rfnoc_chdr_tready (s_rfnoc_chdr_tready), + .m_rfnoc_chdr_tdata (m_rfnoc_chdr_tdata), + .m_rfnoc_chdr_tlast (m_rfnoc_chdr_tlast), + .m_rfnoc_chdr_tvalid (m_rfnoc_chdr_tvalid), + .m_rfnoc_chdr_tready (m_rfnoc_chdr_tready), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready), + .ctrlport_clk (ce_clk), + .ctrlport_rst (ce_rst), + .m_ctrlport_req_wr (ctrlport_reg_req_wr), + .m_ctrlport_req_rd (ctrlport_reg_req_rd), + .m_ctrlport_req_addr (ctrlport_reg_req_addr), + .m_ctrlport_req_data (ctrlport_reg_req_data), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (), + .m_ctrlport_req_time (), + .m_ctrlport_resp_ack (ctrlport_reg_resp_ack), + .m_ctrlport_resp_status (ctrlport_reg_resp_status), + .m_ctrlport_resp_data (ctrlport_reg_resp_data), + .s_ctrlport_req_wr (1'b0), + .s_ctrlport_req_rd (1'b0), + .s_ctrlport_req_addr (20'b0), + .s_ctrlport_req_portid (10'b0), + .s_ctrlport_req_rem_epid (16'b0), + .s_ctrlport_req_rem_portid (10'b0), + .s_ctrlport_req_data (32'b0), + .s_ctrlport_req_byte_en (4'hF), + .s_ctrlport_req_has_time (1'b0), + .s_ctrlport_req_time (64'b0), + .s_ctrlport_resp_ack (), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (), + .axis_data_clk (ce_clk), + .axis_data_rst (ce_rst), + .m_axis_payload_tdata (axis_to_fir_tdata), + .m_axis_payload_tkeep (), + .m_axis_payload_tlast (axis_to_fir_tlast), + .m_axis_payload_tvalid (axis_to_fir_tvalid), + .m_axis_payload_tready (axis_to_fir_tready), + .s_axis_payload_tdata (axis_from_fir_tdata), + .s_axis_payload_tkeep ({NUM_PORTS*NIPC{1'b1}}), + .s_axis_payload_tlast (axis_from_fir_tlast), + .s_axis_payload_tvalid (axis_from_fir_tvalid), + .s_axis_payload_tready (axis_from_fir_tready), + .m_axis_context_tdata (m_axis_context_tdata), + .m_axis_context_tuser (m_axis_context_tuser), + .m_axis_context_tlast (m_axis_context_tlast), + .m_axis_context_tvalid (m_axis_context_tvalid), + .m_axis_context_tready (m_axis_context_tready), + .s_axis_context_tdata (s_axis_context_tdata), + .s_axis_context_tuser (s_axis_context_tuser), + .s_axis_context_tlast (s_axis_context_tlast), + .s_axis_context_tvalid (s_axis_context_tvalid), + .s_axis_context_tready (s_axis_context_tready) + ); + + + // The input packets are the same configuration as the output packets, so + // just use the header information for each incoming to create the header for + // each outgoing packet. This is done by connecting m_axis_context to + // directly to s_axis_context. + assign s_axis_context_tdata = m_axis_context_tdata; + assign s_axis_context_tuser = m_axis_context_tuser; + assign s_axis_context_tlast = m_axis_context_tlast; + assign s_axis_context_tvalid = m_axis_context_tvalid; + assign m_axis_context_tready = s_axis_context_tready; + + + //--------------------------------------------------------------------------- + // Control Port Address Decoding + //--------------------------------------------------------------------------- + + wire [ NUM_PORTS-1:0] m_ctrlport_req_wr; + wire [ NUM_PORTS-1:0] m_ctrlport_req_rd; + wire [20*NUM_PORTS-1:0] m_ctrlport_req_addr; + wire [32*NUM_PORTS-1:0] m_ctrlport_req_data; + wire [ NUM_PORTS-1:0] m_ctrlport_resp_ack; + wire [32*NUM_PORTS-1:0] m_ctrlport_resp_data; + + ctrlport_decoder #( + .NUM_SLAVES (NUM_PORTS), + .BASE_ADDR (0), + .SLAVE_ADDR_W (FIR_FILTER_ADDR_W) + ) ctrlport_deocder_i ( + .ctrlport_clk (ce_clk), + .ctrlport_rst (ce_rst), + .s_ctrlport_req_wr (ctrlport_reg_req_wr), + .s_ctrlport_req_rd (ctrlport_reg_req_rd), + .s_ctrlport_req_addr (ctrlport_reg_req_addr), + .s_ctrlport_req_data (ctrlport_reg_req_data), + .s_ctrlport_req_byte_en (4'b0), + .s_ctrlport_req_has_time (1'b0), + .s_ctrlport_req_time (64'b0), + .s_ctrlport_resp_ack (ctrlport_reg_resp_ack), + .s_ctrlport_resp_status (ctrlport_reg_resp_status), + .s_ctrlport_resp_data (ctrlport_reg_resp_data), + .m_ctrlport_req_wr (m_ctrlport_req_wr), + .m_ctrlport_req_rd (m_ctrlport_req_rd), + .m_ctrlport_req_addr (m_ctrlport_req_addr), + .m_ctrlport_req_data (m_ctrlport_req_data), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (), + .m_ctrlport_req_time (), + .m_ctrlport_resp_ack (m_ctrlport_resp_ack), + .m_ctrlport_resp_status ({NUM_PORTS{2'b0}}), + .m_ctrlport_resp_data (m_ctrlport_resp_data) + ); + + + //--------------------------------------------------------------------------- + // FIR Core Instances + //--------------------------------------------------------------------------- + + genvar i; + for (i = 0; i < NUM_PORTS; i = i+1) begin : gen_rfnoc_fir_filter_cores + rfnoc_fir_filter_core #( + .DATA_W (ITEM_W*NIPC), + .COEFF_WIDTH (COEFF_WIDTH), + .NUM_COEFFS (NUM_COEFFS), + .COEFFS_VEC (COEFFS_VEC), + .RELOADABLE_COEFFS (RELOADABLE_COEFFS), + .SYMMETRIC_COEFFS (SYMMETRIC_COEFFS), + .SKIP_ZERO_COEFFS (SKIP_ZERO_COEFFS), + .USE_EMBEDDED_REGS_COEFFS (USE_EMBEDDED_REGS_COEFFS) + ) rfnoc_fir_filter_core_i ( + .clk (ce_clk), + .rst (ce_rst), + .s_ctrlport_req_wr (m_ctrlport_req_wr[i]), + .s_ctrlport_req_rd (m_ctrlport_req_rd[i]), + .s_ctrlport_req_addr (m_ctrlport_req_addr[20*i +: 20]), + .s_ctrlport_req_data (m_ctrlport_req_data[32*i +: 32]), + .s_ctrlport_resp_ack (m_ctrlport_resp_ack[i]), + .s_ctrlport_resp_data (m_ctrlport_resp_data[32*i +: 32]), + .s_axis_tdata (axis_to_fir_tdata[i*(ITEM_W*NIPC) +: (ITEM_W*NIPC)]), + .s_axis_tlast (axis_to_fir_tlast[i]), + .s_axis_tvalid (axis_to_fir_tvalid[i]), + .s_axis_tready (axis_to_fir_tready[i]), + .m_axis_tdata (axis_from_fir_tdata[i*(ITEM_W*NIPC) +: (ITEM_W*NIPC)]), + .m_axis_tlast (axis_from_fir_tlast[i]), + .m_axis_tvalid (axis_from_fir_tvalid[i]), + .m_axis_tready (axis_from_fir_tready[i]) + ); + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/rfnoc_block_fir_filter_tb.sv b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/rfnoc_block_fir_filter_tb.sv new file mode 100644 index 000000000..28b5493ac --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/rfnoc_block_fir_filter_tb.sv @@ -0,0 +1,524 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_fir_filter_tb +// +// Description: Testbench for rfnoc_block_fir_filter +// + + +module rfnoc_block_fir_filter_tb #( + parameter int NUM_PORTS = 2 +); + + // Include macros and time declarations for use with PkgTestExec + `include "test_exec.svh" + + import PkgTestExec::*; + import PkgChdrUtils::*; + import PkgRfnocBlockCtrlBfm::*; + + `include "rfnoc_fir_filter_regs.vh" + + + //--------------------------------------------------------------------------- + // Local Parameters + //--------------------------------------------------------------------------- + + // Simulation parameters + localparam real CHDR_CLK_PER = 6.0; // 166 MHz + localparam real CE_CLK_PER = 5.0; // 200 MHz + localparam int STALL_PROB = 25; // BFM stall probability + + // DUT parameters to test + localparam int CHDR_W = 64; + localparam int THIS_PORTID = 'h123; + localparam int MTU = 8; + // + localparam int NUM_COEFFS = 41; + localparam int COEFF_WIDTH = 16; + localparam int RELOADABLE_COEFFS = 1; + localparam int SYMMETRIC_COEFFS = 1; + localparam int SKIP_ZERO_COEFFS = 1; + localparam int USE_EMBEDDED_REGS_COEFFS = 1; + + localparam logic [COEFF_WIDTH*NUM_COEFFS-1:0] COEFFS_VEC_0 = { + 16'sd158, 16'sd0, 16'sd33, -16'sd0, -16'sd256, + 16'sd553, 16'sd573, -16'sd542, -16'sd1012, 16'sd349, + 16'sd1536, 16'sd123, -16'sd2097, -16'sd1012, 16'sd1633, + 16'sd1608, -16'sd3077, -16'sd5946, 16'sd3370, 16'sd10513, + 16'sd19295, + 16'sd10513, 16'sd3370, -16'sd5946, -16'sd3077, 16'sd1608, + 16'sd1633, -16'sd1012, -16'sd2097, 16'sd123, 16'sd1536, + 16'sd349, -16'sd1012, -16'sd542, 16'sd573, 16'sd553, + -16'sd256, -16'sd0, 16'sd33, 16'sd0, 16'sd158 + }; + + localparam logic [COEFF_WIDTH*NUM_COEFFS-1:0] COEFFS_VEC_1 = { + 16'sd32767, 16'sd0, -16'sd32767, 16'sd0, 16'sd32767, + -16'sd32767, 16'sd32767, -16'sd32767, 16'sd32767, -16'sd32767, + 16'sd32767, 16'sd32767, 16'sd32767, 16'sd32767, 16'sd32767, + -16'sd32767, -16'sd32767, -16'sd32767, -16'sd32767, -16'sd32767, + 16'sd32767, + -16'sd32767, -16'sd32767, -16'sd32767, -16'sd32767, -16'sd32767, + 16'sd32767, 16'sd32767, 16'sd32767, 16'sd32767, 16'sd32767, + -16'sd32767, 16'sd32767, -16'sd32767, 16'sd32767, -16'sd32767, + 16'sd32767, 16'sd0, -16'sd32767, 16'sd0, 16'sd32767 + }; + + //--------------------------------------------------------------------------- + // Clocks + //--------------------------------------------------------------------------- + + bit rfnoc_chdr_clk; + bit rfnoc_ctrl_clk; + + sim_clock_gen #(CHDR_CLK_PER) rfnoc_chdr_clk_gen (.clk(rfnoc_chdr_clk), .rst()); + sim_clock_gen #(CHDR_CLK_PER) rfnoc_ctrl_clk_gen (.clk(rfnoc_ctrl_clk), .rst()); + sim_clock_gen #(CE_CLK_PER) ce_clk_gen (.clk(ce_clk), .rst()); + + + //--------------------------------------------------------------------------- + // Bus Functional Models + //--------------------------------------------------------------------------- + + RfnocBackendIf backend (rfnoc_chdr_clk, rfnoc_ctrl_clk); + AxiStreamIf #(32) m_ctrl (rfnoc_ctrl_clk, 1'b0); + AxiStreamIf #(32) s_ctrl (rfnoc_ctrl_clk, 1'b0); + AxiStreamIf #(CHDR_W) m_chdr [NUM_PORTS] (rfnoc_chdr_clk, 1'b0); + AxiStreamIf #(CHDR_W) s_chdr [NUM_PORTS] (rfnoc_chdr_clk, 1'b0); + + // Bus functional model for a software block controller + RfnocBlockCtrlBfm #(.CHDR_W(CHDR_W)) blk_ctrl = new(backend, m_ctrl, s_ctrl); + + // Connect block controller to BFMs + for (genvar i = 0; i < NUM_PORTS; i++) begin : gen_bfm_connections + initial begin + blk_ctrl.connect_master_data_port(i, m_chdr[i]); + blk_ctrl.connect_slave_data_port(i, s_chdr[i]); + blk_ctrl.set_master_stall_prob(i, STALL_PROB); + blk_ctrl.set_slave_stall_prob(i, STALL_PROB); + end + end + + + //--------------------------------------------------------------------------- + // DUT + //--------------------------------------------------------------------------- + + logic [NUM_PORTS*CHDR_W-1:0] s_rfnoc_chdr_tdata; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tlast; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tvalid; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tready; + + logic [NUM_PORTS*CHDR_W-1:0] m_rfnoc_chdr_tdata; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tlast; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tvalid; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tready; + + // Map the array of BFMs to a flat vector for the DUT + genvar i; + for (i = 0; i < NUM_PORTS; i++) begin : gen_dut_connections + // Connect BFM master to DUT slave port + assign s_rfnoc_chdr_tdata[CHDR_W*i+:CHDR_W] = m_chdr[i].tdata; + assign s_rfnoc_chdr_tlast[i] = m_chdr[i].tlast; + assign s_rfnoc_chdr_tvalid[i] = m_chdr[i].tvalid; + assign m_chdr[i].tready = s_rfnoc_chdr_tready[i]; + + // Connect BFM slave to DUT master port + assign s_chdr[i].tdata = m_rfnoc_chdr_tdata[CHDR_W*i+:CHDR_W]; + assign s_chdr[i].tlast = m_rfnoc_chdr_tlast[i]; + assign s_chdr[i].tvalid = m_rfnoc_chdr_tvalid[i]; + assign m_rfnoc_chdr_tready[i] = s_chdr[i].tready; + end + + + rfnoc_block_fir_filter #( + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W), + .NUM_PORTS (NUM_PORTS), + .MTU (MTU), + .COEFF_WIDTH (COEFF_WIDTH), + .NUM_COEFFS (NUM_COEFFS), + .COEFFS_VEC (COEFFS_VEC_0), + .RELOADABLE_COEFFS (RELOADABLE_COEFFS), + .SYMMETRIC_COEFFS (SYMMETRIC_COEFFS), + .SKIP_ZERO_COEFFS (SKIP_ZERO_COEFFS), + .USE_EMBEDDED_REGS_COEFFS (USE_EMBEDDED_REGS_COEFFS) + ) rfnoc_block_fir_filter_i ( + .ce_clk (ce_clk), + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .s_rfnoc_chdr_tdata (s_rfnoc_chdr_tdata), + .s_rfnoc_chdr_tlast (s_rfnoc_chdr_tlast), + .s_rfnoc_chdr_tvalid (s_rfnoc_chdr_tvalid), + .s_rfnoc_chdr_tready (s_rfnoc_chdr_tready), + .m_rfnoc_chdr_tdata (m_rfnoc_chdr_tdata), + .m_rfnoc_chdr_tlast (m_rfnoc_chdr_tlast), + .m_rfnoc_chdr_tvalid (m_rfnoc_chdr_tvalid), + .m_rfnoc_chdr_tready (m_rfnoc_chdr_tready), + .rfnoc_core_config (backend.cfg), + .rfnoc_core_status (backend.sts), + .rfnoc_ctrl_clk (backend.ctrl_clk), + .s_rfnoc_ctrl_tdata (m_ctrl.tdata), + .s_rfnoc_ctrl_tlast (m_ctrl.tlast), + .s_rfnoc_ctrl_tvalid (m_ctrl.tvalid), + .s_rfnoc_ctrl_tready (m_ctrl.tready), + .m_rfnoc_ctrl_tdata (s_ctrl.tdata), + .m_rfnoc_ctrl_tlast (s_ctrl.tlast), + .m_rfnoc_ctrl_tvalid (s_ctrl.tvalid), + .m_rfnoc_ctrl_tready (s_ctrl.tready) + ); + + + + //--------------------------------------------------------------------------- + // Helper Tasks + //--------------------------------------------------------------------------- + + // Translate the desired register access to a ctrlport write request. + task automatic write_reg(int port, byte addr, bit [31:0] value); + blk_ctrl.reg_write(port * (2**FIR_FILTER_ADDR_W) + addr, value); + endtask : write_reg + + // Translate the desired register access to a ctrlport read request. + task automatic read_reg(int port, byte addr, output logic [31:0] value); + blk_ctrl.reg_read(port * (2**FIR_FILTER_ADDR_W), value); + endtask : read_reg + + + + //--------------------------------------------------------------------------- + // Test Process + //--------------------------------------------------------------------------- + + initial begin : tb_main + // Display testbench start message + test.start_tb("rfnoc_block_fir_filter_tb"); + + // Start the BFMs running + blk_ctrl.run(); + + + //------------------------------------------------------------------------- + // Reset + //------------------------------------------------------------------------- + + test.start_test("Wait for Reset", 10us); + fork + blk_ctrl.reset_chdr(); + blk_ctrl.reset_ctrl(); + join; + test.end_test(); + + + //------------------------------------------------------------------------- + // Check NoC ID and Block Info + //------------------------------------------------------------------------- + + test.start_test("Verify Block Info", 2us); + `ASSERT_ERROR(blk_ctrl.get_noc_id() == rfnoc_block_fir_filter_i.NOC_ID, "Incorrect NOC_ID Value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_i() == NUM_PORTS, "Incorrect NUM_DATA_I Value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_o() == NUM_PORTS, "Incorrect NUM_DATA_O Value"); + `ASSERT_ERROR(blk_ctrl.get_mtu() == MTU, "Incorrect MTU Value"); + test.end_test(); + + + // Test all ports + for (int port = 0; port < NUM_PORTS; port++) begin : port_loop + + //----------------------------------------------------------------------- + // Check filter length + //----------------------------------------------------------------------- + + begin + int num_coeffs, num_coeffs_to_send; + + test.start_test("Check filter length", 20us); + + read_reg(port, REG_FIR_NUM_COEFFS, num_coeffs); + `ASSERT_ERROR(num_coeffs, "Incorrect number of coefficients"); + + // If using symmetric coefficients, send just first half + if (SYMMETRIC_COEFFS) begin + num_coeffs_to_send = num_coeffs/2 + num_coeffs[0]; + end else begin + num_coeffs_to_send = num_coeffs; + end + + // If using embedded register, coefficients must be preloaded + if (USE_EMBEDDED_REGS_COEFFS) begin + int i; + for (i = 0; i < num_coeffs_to_send-1; i++) begin + write_reg(port, REG_FIR_LOAD_COEFF, COEFFS_VEC_0[COEFF_WIDTH*i +: COEFF_WIDTH]); + end + write_reg(port, REG_FIR_LOAD_COEFF_LAST, COEFFS_VEC_0[COEFF_WIDTH*i +: COEFF_WIDTH]); + end + + test.end_test(); + end + + + //----------------------------------------------------------------------- + // Test impulse response with default coefficients + //----------------------------------------------------------------------- + // + // Sending an impulse should cause the coefficients to be output. + // + //----------------------------------------------------------------------- + + begin + chdr_word_t send_payload[$]; + chdr_word_t recv_payload[$]; + int num_bytes; + logic signed [15:0] i_samp, q_samp, i_coeff, q_coeff; + string s; + + test.start_test("Test impulse response (default coefficients)", 20us); + + // Generate packet containing an impulse and enqueue it for transfer + send_payload = {}; + send_payload.push_back({16'b0, 16'b0, 16'h7FFF, 16'h7FFF}); + for (int i = 0; i < NUM_COEFFS/2; i++) begin + send_payload.push_back(0); + end + blk_ctrl.send(port, send_payload, NUM_COEFFS*4); + + // Enqueue two packets with zeros to push out the impulse from the + // pipeline (one to push out the data and one to overcome some pipeline + // registering). + send_payload = {}; + for (int i = 0; i < NUM_COEFFS/2+1; i++) begin + send_payload.push_back(0); + end + for (int n = 0; n < 2; n++) begin + blk_ctrl.send(port, send_payload, NUM_COEFFS*4); + end + + // Receive the result + blk_ctrl.recv(port, recv_payload, num_bytes); + + // Check the length of the packet + `ASSERT_ERROR( + num_bytes == NUM_COEFFS*4, + "Received packet didn't have expected length" + ); + + for (int i = 0; i < NUM_COEFFS; i++) begin + // Compute the expected sample + i_coeff = $signed(COEFFS_VEC_0[COEFF_WIDTH*i +: COEFF_WIDTH]); + q_coeff = i_coeff; + + // Grab the next sample + {i_samp, q_samp} = recv_payload[i/2][i[0]*32 +: 32]; + + // Check I / Q values + $sformat( + s, "Incorrect I value received on sample %0d! Expected: %0d, Received: %0d", + i, i_coeff, i_samp); + `ASSERT_ERROR( + (i_samp == i_coeff) || (i_samp-1 == i_coeff) || (i_samp+1 == i_coeff), s); + $sformat( + s, "Incorrect Q value received on sample %0d! Expected: %0d, Received: %0d", + i, q_coeff, q_samp); + `ASSERT_ERROR( + (q_samp == q_coeff) || (q_samp-1 == q_coeff) || (q_samp+1 == q_coeff), s); + end + + test.end_test(); + end + + + //----------------------------------------------------------------------- + // Load new coefficients + //----------------------------------------------------------------------- + + begin + int i; + int num_coeffs_to_send; + + // If using symmetric coefficients, send just first half + if (SYMMETRIC_COEFFS) begin + num_coeffs_to_send = NUM_COEFFS/2 + NUM_COEFFS[0]; + end else begin + num_coeffs_to_send = NUM_COEFFS; + end + + test.start_test("Load new coefficients", 20us); + for (i = 0; i < num_coeffs_to_send-1; i++) begin + write_reg(port, REG_FIR_LOAD_COEFF, COEFFS_VEC_1[COEFF_WIDTH*i +: COEFF_WIDTH]); + end + write_reg(port, REG_FIR_LOAD_COEFF_LAST, COEFFS_VEC_1[COEFF_WIDTH*i +: COEFF_WIDTH]); + test.end_test(); + end + + + //----------------------------------------------------------------------- + // Test impulse response with new coefficients + //----------------------------------------------------------------------- + // + // Sending an impulse should cause the coefficients to be output. + // + //----------------------------------------------------------------------- + + begin + chdr_word_t send_payload[$]; + chdr_word_t recv_payload[$]; + int num_bytes; + logic signed [15:0] i_samp, q_samp, i_coeff, q_coeff; + string s; + + test.start_test("Test impulse response (loaded coefficients)", 20us); + + // Generate packet containing an impulse and enqueue it for transfer + send_payload = {}; + send_payload.push_back({16'b0, 16'b0, 16'h7FFF, 16'h7FFF}); + for (int i = 0; i < NUM_COEFFS/2; i++) begin + send_payload.push_back(0); + end + blk_ctrl.send(port, send_payload, NUM_COEFFS*4); + + // Enqueue two packets with zeros to push out the impulse from the + // pipeline (one to push out the data and one to overcome some pipeline + // registering). + send_payload = {}; + for (int i = 0; i < NUM_COEFFS/2+1; i++) begin + send_payload.push_back(0); + end + for (int n = 0; n < 2; n++) begin + blk_ctrl.send(port, send_payload, NUM_COEFFS*4); + end + + // Ignore the first two packets (discard the extra data we put in when + // we checked the default coefficients). + blk_ctrl.recv(port, recv_payload, num_bytes); + blk_ctrl.recv(port, recv_payload, num_bytes); + + // Receive the result + blk_ctrl.recv(port, recv_payload, num_bytes); + + // Check the length of the packet + `ASSERT_ERROR( + num_bytes == NUM_COEFFS*4, + "Received packet didn't have expected length" + ); + + for (int i = 0; i < NUM_COEFFS; i++) begin + // Compute the expected sample + i_coeff = $signed(COEFFS_VEC_1[COEFF_WIDTH*i +: COEFF_WIDTH]); + q_coeff = i_coeff; + + // Grab the next sample + {i_samp, q_samp} = recv_payload[i/2][i[0]*32 +: 32]; + + // Check I / Q values + $sformat( + s, "Incorrect I value received on sample %0d! Expected: %0d, Received: %0d", + i, i_coeff, i_samp); + `ASSERT_ERROR( + (i_samp == i_coeff) || (i_samp-1 == i_coeff) || (i_samp+1 == i_coeff), s); + $sformat( + s, "Incorrect Q value received on sample %0d! Expected: %0d, Received: %0d", + i, q_coeff, q_samp); + `ASSERT_ERROR( + (q_samp == q_coeff) || (q_samp-1 == q_coeff) || (q_samp+1 == q_coeff), s); + end + + test.end_test(); + end + + + //----------------------------------------------------------------------- + // Test step response + //----------------------------------------------------------------------- + + begin + chdr_word_t send_payload[$]; + chdr_word_t recv_payload[$]; + int num_bytes; + int coeff_sum; + logic signed [15:0] i_samp, q_samp; + string s; + + test.start_test("Test step response", 20us); + + // Generate a step function packet + send_payload = {}; + for (int i = 0; i < NUM_COEFFS/2+1; i++) begin + send_payload.push_back({16'h7FFF,16'h7FFF,16'h7FFF,16'h7FFF}); + end + + // Enqueue step function two times, once to fill up the pipeline and + // another to get the actual response. + for (int n = 0; n < 2; n++) begin + blk_ctrl.send(port, send_payload, NUM_COEFFS*4); + end + + // Enqueue two packets with zeros to push out the impulse from the + // pipeline (one to push out the data and one to overcome some pipeline + // registering). + send_payload = {}; + for (int i = 0; i < NUM_COEFFS/2+1; i++) begin + send_payload.push_back(0); + end + for (int n = 0; n < 2; n++) begin + blk_ctrl.send(port, send_payload, NUM_COEFFS*4); + end + + // Ignore the first two packets (discard the extra data we put in + // during the previous test). + for (int n = 0; n < 3; n++) begin + blk_ctrl.recv(port, recv_payload, num_bytes); + end + + // Receive the result + blk_ctrl.recv(port, recv_payload, num_bytes); + + // Check the length of the packet + `ASSERT_ERROR( + num_bytes == NUM_COEFFS*4, + "Received packet didn't have expected length" + ); + + // Calculate sum of all the coefficients + coeff_sum = 0; + for (int i = 0; i < NUM_COEFFS; i++) begin + coeff_sum += $signed(COEFFS_VEC_1[COEFF_WIDTH*i +: COEFF_WIDTH]); + end + + for (int i = 0; i < NUM_COEFFS; i++) begin + // Grab the next sample + {i_samp, q_samp} = recv_payload[i/2][i[0]*32 +: 32]; + + // Check I / Q values + $sformat( + s, "Incorrect I value received on sample %0d! Expected: %0d, Received: %0d", + i, coeff_sum, i_samp); + `ASSERT_ERROR( + (i_samp == coeff_sum) || (i_samp-1 == coeff_sum) || (i_samp+1 == coeff_sum), + s + ); + $sformat( + s, "Incorrect Q value received on sample %0d! Expected: %0d, Received: %0d", + i, coeff_sum, q_samp); + `ASSERT_ERROR( + (q_samp == coeff_sum) || (q_samp-1 == coeff_sum) || (q_samp+1 == coeff_sum), + s + ); + end + + test.end_test(); + end + + end : port_loop + + + //------------------------------------------------------------------------- + // All done! + //------------------------------------------------------------------------- + + test.end_tb(1); + + end : tb_main +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/rfnoc_fir_filter_core.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/rfnoc_fir_filter_core.v new file mode 100644 index 000000000..774f43761 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/rfnoc_fir_filter_core.v @@ -0,0 +1,228 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Description: +// +// Core module for a single instance of an FIR filter, implementing the +// registers and signal processing for a single I/Q filter. It assumes the +// data stream is an IQ pair with I in the upper 32 bits and Q is the lower +// 32 bits. +// +// Parameters: +// +// DATA_W : Width of the input/output data stream to +// process. +// +// BASE_ADDR : Control port base address to which this block +// responds. +// +// COEFF_WIDTH : Coefficient width +// +// NUM_COEFFS : Number of coefficients / filter taps +// +// COEFFS_VEC : Vector of NUM_COEFFS values, each of width +// COEFF_WIDTH, to initialize the filter +// coefficients. Defaults to an impulse. +// +// RELOADABLE_COEFFS : Enable (1) or disable (0) reloading +// coefficients at runtime +// +// SYMMETRIC_COEFFS : Reduce multiplier usage by approximately half +// if coefficients are symmetric +// +// SKIP_ZERO_COEFFS : Reduce multiplier usage by assuming zero valued +// coefficients in DEFAULT_COEFFS are always zero. +// Useful for halfband filters. +// +// USE_EMBEDDED_REGS_COEFFS : Reduce register usage by only using embedded +// registers in DSP slices. Updating taps while +// streaming will cause temporary output +// corruption! +// +// Note: If using USE_EMBEDDED_REGS_COEFFS, coefficients must be written at +// least once since COEFFS_VEC is ignored! + + +module rfnoc_fir_filter_core #( + parameter DATA_W = 32, + parameter [19:0] BASE_ADDR = 0, + + // FIR Filter Parameters + parameter COEFF_WIDTH = 16, + parameter NUM_COEFFS = 41, + parameter [NUM_COEFFS*COEFF_WIDTH-1:0] COEFFS_VEC = // Make impulse by default + { + {1'b0, {(COEFF_WIDTH-1){1'b1}} }, // Max positive value + {(COEFF_WIDTH*(NUM_COEFFS-1)){1'b0}} // Zero for remaining coefficients + }, + parameter RELOADABLE_COEFFS = 1, + parameter SYMMETRIC_COEFFS = 0, + parameter SKIP_ZERO_COEFFS = 0, + parameter USE_EMBEDDED_REGS_COEFFS = 1 +) ( + + input wire clk, + input wire rst, + + //--------------------------------------------------------------------------- + // AXIS CTRL Port + //--------------------------------------------------------------------------- + + // Master + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + output reg s_ctrlport_resp_ack, + output reg [31:0] s_ctrlport_resp_data, + + //--------------------------------------------------------------------------- + // Data Interface + //--------------------------------------------------------------------------- + + // Input data stream + input wire [DATA_W-1:0] s_axis_tdata, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + + // Output data stream + output wire [DATA_W-1:0] m_axis_tdata, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready +); + + reg [COEFF_WIDTH-1:0] m_axis_reload_tdata; + reg m_axis_reload_tvalid; + reg m_axis_reload_tlast; + wire m_axis_reload_tready; + + + //--------------------------------------------------------------------------- + // Registers + //--------------------------------------------------------------------------- + + `include "rfnoc_fir_filter_regs.vh" + + // Separate the address into the block and register portions. Ignore the byte + // offset. + wire [20:0] block_addr = s_ctrlport_req_addr[19:FIR_FILTER_ADDR_W]; + wire [19:0] reg_addr = { s_ctrlport_req_addr[FIR_FILTER_ADDR_W:2], 2'b0 }; + + always @(posedge clk) begin + if (rst) begin + s_ctrlport_resp_ack <= 0; + m_axis_reload_tvalid <= 0; + s_ctrlport_resp_data <= {32{1'bX}}; + m_axis_reload_tdata <= {DATA_W{1'bX}}; + m_axis_reload_tlast <= 1'bX; + end else if (block_addr == BASE_ADDR) begin + // Default assignments + s_ctrlport_resp_ack <= 0; + s_ctrlport_resp_data <= 0; + + // Handle write acknowledgments. Don't ack the register write until it + // gets accepted by the FIR filter. + if (m_axis_reload_tvalid && m_axis_reload_tready) begin + m_axis_reload_tvalid <= 1'b0; + s_ctrlport_resp_ack <= 1'b1; + end + + // Handle register writes + if (s_ctrlport_req_wr) begin + if (reg_addr == REG_FIR_LOAD_COEFF) begin + m_axis_reload_tdata <= s_ctrlport_req_data[COEFF_WIDTH-1:0]; + m_axis_reload_tvalid <= 1'b1; + m_axis_reload_tlast <= 1'b0; + end else if (reg_addr == REG_FIR_LOAD_COEFF_LAST) begin + m_axis_reload_tdata <= s_ctrlport_req_data[COEFF_WIDTH-1:0]; + m_axis_reload_tvalid <= 1'b1; + m_axis_reload_tlast <= 1'b1; + end + end + + // Handle register reads + if (s_ctrlport_req_rd) begin + // Ignore the upper bits so the we respond to any port + if (reg_addr == REG_FIR_NUM_COEFFS) begin + s_ctrlport_resp_data <= NUM_COEFFS; + s_ctrlport_resp_ack <= 1; + end + end + end + end + + + //--------------------------------------------------------------------------- + // FIR Filter Instances + //--------------------------------------------------------------------------- + + localparam IN_WIDTH = DATA_W/2; + localparam OUT_WIDTH = DATA_W/2; + + // I + axi_fir_filter #( + .IN_WIDTH (IN_WIDTH), + .COEFF_WIDTH (COEFF_WIDTH), + .OUT_WIDTH (OUT_WIDTH), + .NUM_COEFFS (NUM_COEFFS), + .COEFFS_VEC (COEFFS_VEC), + .RELOADABLE_COEFFS (RELOADABLE_COEFFS), + .BLANK_OUTPUT (1), + // Optional optimizations + .SYMMETRIC_COEFFS (SYMMETRIC_COEFFS), + .SKIP_ZERO_COEFFS (SKIP_ZERO_COEFFS), + .USE_EMBEDDED_REGS_COEFFS (USE_EMBEDDED_REGS_COEFFS) + ) inst_axi_fir_filter_i ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .s_axis_data_tdata (s_axis_tdata[2*IN_WIDTH-1:IN_WIDTH]), + .s_axis_data_tlast (s_axis_tlast), + .s_axis_data_tvalid (s_axis_tvalid), + .s_axis_data_tready (s_axis_tready), + .m_axis_data_tdata (m_axis_tdata[2*OUT_WIDTH-1:OUT_WIDTH]), + .m_axis_data_tlast (m_axis_tlast), + .m_axis_data_tvalid (m_axis_tvalid), + .m_axis_data_tready (m_axis_tready), + .s_axis_reload_tdata (m_axis_reload_tdata), + .s_axis_reload_tlast (m_axis_reload_tlast), + .s_axis_reload_tvalid (m_axis_reload_tvalid), + .s_axis_reload_tready (m_axis_reload_tready) + ); + + // Q + axi_fir_filter #( + .IN_WIDTH (IN_WIDTH), + .COEFF_WIDTH (COEFF_WIDTH), + .OUT_WIDTH (OUT_WIDTH), + .NUM_COEFFS (NUM_COEFFS), + .COEFFS_VEC (COEFFS_VEC), + .RELOADABLE_COEFFS (RELOADABLE_COEFFS), + .BLANK_OUTPUT (1), + // Optional optimizations + .SYMMETRIC_COEFFS (SYMMETRIC_COEFFS), + .SKIP_ZERO_COEFFS (SKIP_ZERO_COEFFS), + .USE_EMBEDDED_REGS_COEFFS (USE_EMBEDDED_REGS_COEFFS) + ) inst_axi_fir_filter_q ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .s_axis_data_tdata (s_axis_tdata[IN_WIDTH-1:0]), + .s_axis_data_tlast (s_axis_tlast), + .s_axis_data_tvalid (s_axis_tvalid), + .s_axis_data_tready (), + .m_axis_data_tdata (m_axis_tdata[OUT_WIDTH-1:0]), + .m_axis_data_tlast (), + .m_axis_data_tvalid (), + .m_axis_data_tready (m_axis_tready), + .s_axis_reload_tdata (m_axis_reload_tdata), + .s_axis_reload_tlast (m_axis_reload_tlast), + .s_axis_reload_tvalid (m_axis_reload_tvalid), + .s_axis_reload_tready () + ); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/rfnoc_fir_filter_regs.vh b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/rfnoc_fir_filter_regs.vh new file mode 100644 index 000000000..0e070e3a3 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_fir_filter/rfnoc_fir_filter_regs.vh @@ -0,0 +1,51 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: fir_filter_regs (Header) +// +// Description: Header file for rfnoc_block_fir_filter. All registers are +// 32-bit words from software's perspective. +// + +// Address space size, per FIR filter. That is, each filter is separated in the +// CTRL Port address space by 2^FIR_FILTER_ADDR_W bytes. +localparam FIR_FILTER_ADDR_W = 4; + + + +// REG_FIR_NUM_COEFFS (R) +// +// Contains the number of coefficients for the filter. +// +// [31:0] : Returns the number of coefficients (read-only) +// +localparam REG_FIR_NUM_COEFFS = 'h0; + + +// REG_FIR_LOAD_COEFF (R) +// +// Register for inputting the next coefficient to be loaded into the filter. To +// load a new set of filter coefficients, write NUM_COEFFS-1 coefficients to +// this register, then write the last coefficient to REG_FIR_LOAD_COEFF_LAST. +// The width of each coefficient is set by the COEFF_WIDTH parameter on the +// block. +// +// [31:(32-COEFF_WIDTH)] : Reserved +// [COEFF_WIDTH-1:0] : The next coefficient to be loaded +// +localparam REG_FIR_LOAD_COEFF = 'h4; + + +// REG_FIR_LOAD_COEFF_LAST (R) +// +// Register for inputting the last coefficient to be loaded into the filter. To +// load a new set of filter coefficients, write NUM_COEFFS-1 coefficients to +// REG_FIR_LOAD_COEFF, then write the last coefficient to this register. The +// width of each coefficient is set by the COEFF_WIDTH parameter on the block. +// +// [31:(32-COEFF_WIDTH)] : Reserved +// [COEFF_WIDTH-1:0] : The next coefficient to be loaded +// +localparam REG_FIR_LOAD_COEFF_LAST = 'h8;
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_null_src_sink/Makefile b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_null_src_sink/Makefile new file mode 100644 index 000000000..30ce14aec --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_null_src_sink/Makefile @@ -0,0 +1,45 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/utils/Makefile.srcs +include Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(RFNOC_CORE_SRCS) \ +$(RFNOC_UTIL_SRCS) \ +$(RFNOC_OOT_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +SIM_TOP = rfnoc_block_null_src_sink_tb + +SIM_SRCS = \ +$(abspath rfnoc_block_null_src_sink_tb.sv) \ + +# MODELSIM_USER_DO = $(abspath wave.do) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_null_src_sink/Makefile.srcs b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_null_src_sink/Makefile.srcs new file mode 100644 index 000000000..a99bec7db --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_null_src_sink/Makefile.srcs @@ -0,0 +1,12 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Brand +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +################################################## +# RFNoC Utility Sources +################################################## +RFNOC_OOT_SRCS += $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/blocks/rfnoc_block_null_src_sink/, \ +rfnoc_block_null_src_sink.v \ +)) diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_null_src_sink/rfnoc_block_null_src_sink.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_null_src_sink/rfnoc_block_null_src_sink.v new file mode 100644 index 000000000..f4f4d7651 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_null_src_sink/rfnoc_block_null_src_sink.v @@ -0,0 +1,338 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_null_src_sink +// Description: +// +// Parameters: +// +// Signals: + +module rfnoc_block_null_src_sink #( + parameter [9:0] THIS_PORTID = 10'd0, + parameter CHDR_W = 64, + parameter NIPC = 2, + parameter [5:0] MTU = 10 +)( + // RFNoC Framework Clocks and Resets + input wire rfnoc_chdr_clk, + input wire rfnoc_ctrl_clk, + // RFNoC Backend Interface + input wire [511:0] rfnoc_core_config, + output wire [511:0] rfnoc_core_status, + // 2 CHDR Input Ports (from framework) + input wire [(CHDR_W*2)-1:0] s_rfnoc_chdr_tdata, + input wire [1:0] s_rfnoc_chdr_tlast, + input wire [1:0] s_rfnoc_chdr_tvalid, + output wire [1:0] s_rfnoc_chdr_tready, + // 2 CHDR Output Ports (to framework) + output wire [(CHDR_W*2)-1:0] m_rfnoc_chdr_tdata, + output wire [1:0] m_rfnoc_chdr_tlast, + output wire [1:0] m_rfnoc_chdr_tvalid, + input wire [1:0] m_rfnoc_chdr_tready, + // AXIS-Ctrl Input Port (from framework) + input wire [31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + // AXIS-Ctrl Output Port (to framework) + output wire [31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready +); + + `include "../../core/rfnoc_chdr_utils.vh" + + localparam [19:0] REG_CTRL_STATUS = 20'h00; + localparam [19:0] REG_SRC_LINES_PER_PKT = 20'h04; + localparam [19:0] REG_SRC_BYTES_PER_PKT = 20'h08; + localparam [19:0] REG_SRC_THROTTLE_CYC = 20'h0C; + localparam [19:0] REG_SNK_LINE_CNT_LO = 20'h10; + localparam [19:0] REG_SNK_LINE_CNT_HI = 20'h14; + localparam [19:0] REG_SNK_PKT_CNT_LO = 20'h18; + localparam [19:0] REG_SNK_PKT_CNT_HI = 20'h1C; + localparam [19:0] REG_SRC_LINE_CNT_LO = 20'h20; + localparam [19:0] REG_SRC_LINE_CNT_HI = 20'h24; + localparam [19:0] REG_SRC_PKT_CNT_LO = 20'h28; + localparam [19:0] REG_SRC_PKT_CNT_HI = 20'h2C; + localparam [19:0] REG_LOOP_LINE_CNT_LO = 20'h30; + localparam [19:0] REG_LOOP_LINE_CNT_HI = 20'h34; + localparam [19:0] REG_LOOP_PKT_CNT_LO = 20'h38; + localparam [19:0] REG_LOOP_PKT_CNT_HI = 20'h3C; + + wire rfnoc_chdr_rst; + wire rfnoc_ctrl_rst; + + wire ctrlport_req_wr; + wire ctrlport_req_rd; + wire [19:0] ctrlport_req_addr; + wire [31:0] ctrlport_req_data; + reg ctrlport_resp_ack; + reg [31:0] ctrlport_resp_data; + + wire [(32*NIPC)-1:0] src_pyld_tdata , snk_pyld_tdata , loop_pyld_tdata ; + wire [NIPC-1:0] src_pyld_tkeep , snk_pyld_tkeep , loop_pyld_tkeep ; + wire src_pyld_tlast , snk_pyld_tlast , loop_pyld_tlast ; + wire src_pyld_tvalid, snk_pyld_tvalid, loop_pyld_tvalid; + wire src_pyld_tready, snk_pyld_tready, loop_pyld_tready; + + wire [CHDR_W-1:0] src_ctxt_tdata , snk_ctxt_tdata , loop_ctxt_tdata ; + wire [3:0] src_ctxt_tuser , snk_ctxt_tuser , loop_ctxt_tuser ; + wire src_ctxt_tlast , snk_ctxt_tlast , loop_ctxt_tlast ; + wire src_ctxt_tvalid, snk_ctxt_tvalid, loop_ctxt_tvalid; + wire src_ctxt_tready, snk_ctxt_tready, loop_ctxt_tready; + + // NoC Shell + // --------------------------- + noc_shell_generic_ctrlport_pyld_chdr #( + .NOC_ID (32'h0000_0001), + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W), + .CTRL_FIFOSIZE (5), + .CTRLPORT_SLV_EN (0), + .NUM_DATA_I (2), + .NUM_DATA_O (2), + .ITEM_W (32), + .NIPC (NIPC), + .MTU (MTU), + .CTXT_FIFOSIZE (1), + .PYLD_FIFOSIZE (1) + ) noc_shell_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk ), + .rfnoc_chdr_rst (rfnoc_chdr_rst ), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk ), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst ), + .rfnoc_core_config (rfnoc_core_config ), + .rfnoc_core_status (rfnoc_core_status ), + .s_rfnoc_chdr_tdata (s_rfnoc_chdr_tdata ), + .s_rfnoc_chdr_tlast (s_rfnoc_chdr_tlast ), + .s_rfnoc_chdr_tvalid (s_rfnoc_chdr_tvalid ), + .s_rfnoc_chdr_tready (s_rfnoc_chdr_tready ), + .m_rfnoc_chdr_tdata (m_rfnoc_chdr_tdata ), + .m_rfnoc_chdr_tlast (m_rfnoc_chdr_tlast ), + .m_rfnoc_chdr_tvalid (m_rfnoc_chdr_tvalid ), + .m_rfnoc_chdr_tready (m_rfnoc_chdr_tready ), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata ), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast ), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid ), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready ), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata ), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast ), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid ), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready ), + .m_ctrlport_req_wr (ctrlport_req_wr ), + .m_ctrlport_req_rd (ctrlport_req_rd ), + .m_ctrlport_req_addr (ctrlport_req_addr ), + .m_ctrlport_req_data (ctrlport_req_data ), + .m_ctrlport_req_byte_en ( ), + .m_ctrlport_req_has_time ( ), + .m_ctrlport_req_time ( ), + .m_ctrlport_resp_ack (ctrlport_resp_ack ), + .m_ctrlport_resp_status (2'd0 ), + .m_ctrlport_resp_data (ctrlport_resp_data ), + .s_ctrlport_req_wr ('h0 ), + .s_ctrlport_req_rd ('h0 ), + .s_ctrlport_req_addr ('h0 ), + .s_ctrlport_req_portid ('h0 ), + .s_ctrlport_req_rem_epid ('h0 ), + .s_ctrlport_req_rem_portid('h0 ), + .s_ctrlport_req_data ('h0 ), + .s_ctrlport_req_byte_en ('h0 ), + .s_ctrlport_req_has_time ('h0 ), + .s_ctrlport_req_time ('h0 ), + .s_ctrlport_resp_ack ( ), + .s_ctrlport_resp_status ( ), + .s_ctrlport_resp_data ( ), + .m_axis_payload_tdata ({loop_pyld_tdata , snk_pyld_tdata }), + .m_axis_payload_tkeep ({loop_pyld_tkeep , snk_pyld_tkeep }), + .m_axis_payload_tlast ({loop_pyld_tlast , snk_pyld_tlast }), + .m_axis_payload_tvalid ({loop_pyld_tvalid, snk_pyld_tvalid}), + .m_axis_payload_tready ({loop_pyld_tready, snk_pyld_tready}), + .m_axis_context_tdata ({loop_ctxt_tdata , snk_ctxt_tdata }), + .m_axis_context_tuser ({loop_ctxt_tuser , snk_ctxt_tuser }), + .m_axis_context_tlast ({loop_ctxt_tlast , snk_ctxt_tlast }), + .m_axis_context_tvalid ({loop_ctxt_tvalid, snk_ctxt_tvalid}), + .m_axis_context_tready ({loop_ctxt_tready, snk_ctxt_tready}), + .s_axis_payload_tdata ({loop_pyld_tdata , src_pyld_tdata }), + .s_axis_payload_tkeep ({loop_pyld_tkeep , src_pyld_tkeep }), + .s_axis_payload_tlast ({loop_pyld_tlast , src_pyld_tlast }), + .s_axis_payload_tvalid ({loop_pyld_tvalid, src_pyld_tvalid}), + .s_axis_payload_tready ({loop_pyld_tready, src_pyld_tready}), + .s_axis_context_tdata ({loop_ctxt_tdata , src_ctxt_tdata }), + .s_axis_context_tuser ({loop_ctxt_tuser , src_ctxt_tuser }), + .s_axis_context_tlast ({loop_ctxt_tlast , src_ctxt_tlast }), + .s_axis_context_tvalid ({loop_ctxt_tvalid, src_ctxt_tvalid}), + .s_axis_context_tready ({loop_ctxt_tready, src_ctxt_tready}) + ); + + // Packet Counters + // --------------------------- + reg reg_clear_cnts = 1'b0; + reg [63:0] snk_line_cnt = 64'd0, snk_pkt_cnt = 64'd0; + reg [63:0] src_line_cnt = 64'd0, src_pkt_cnt = 64'd0; + reg [63:0] loop_line_cnt = 64'd0, loop_pkt_cnt = 64'd0; + + always @(posedge rfnoc_chdr_clk) begin + if (rfnoc_chdr_rst | reg_clear_cnts) begin + snk_line_cnt <= 64'd0; + snk_pkt_cnt <= 64'd0; + src_line_cnt <= 64'd0; + src_pkt_cnt <= 64'd0; + loop_line_cnt <= 64'd0; + loop_pkt_cnt <= 64'd0; + end else begin + if (snk_pyld_tvalid & snk_pyld_tready) begin + snk_line_cnt <= snk_line_cnt + 1; + if (snk_pyld_tlast) + snk_pkt_cnt <= snk_pkt_cnt + 1; + end + if (src_pyld_tvalid & src_pyld_tready) begin + src_line_cnt <= src_line_cnt + 1; + if (src_pyld_tlast) + src_pkt_cnt <= src_pkt_cnt + 1; + end + if (loop_pyld_tvalid & loop_pyld_tready) begin + loop_line_cnt <= loop_line_cnt + 1; + if (loop_pyld_tlast) + loop_pkt_cnt <= loop_pkt_cnt + 1; + end + end + end + + // NULL Sink + // --------------------------- + assign snk_pyld_tready = 1'b1; + assign snk_ctxt_tready = 1'b1; + + // NULL Source + // --------------------------- + reg reg_src_en = 1'b0; + reg [11:0] reg_src_lpp = 12'd0; + reg [15:0] reg_src_bpp = 16'd0; + reg [9:0] reg_throttle_cyc = 10'd0; + + localparam [1:0] ST_HDR = 2'd0; + localparam [1:0] ST_PYLD = 2'd1; + localparam [1:0] ST_WAIT = 2'd2; + + reg [1:0] state = ST_HDR; + reg [11:0] lines_left = 12'd0; + reg [9:0] throttle_cntr = 10'd0; + + always @(posedge rfnoc_chdr_clk) begin + if (rfnoc_chdr_rst) begin + state <= ST_HDR; + end else begin + case (state) + ST_HDR: begin + if (src_ctxt_tvalid && src_ctxt_tready) begin + state <= ST_PYLD; + lines_left <= reg_src_lpp; + end + end + ST_PYLD: begin + if (src_pyld_tvalid && src_pyld_tready) begin + if (src_pyld_tlast) begin + if (reg_throttle_cyc == 10'd0) begin + state <= ST_HDR; + end else begin + state <= ST_WAIT; + throttle_cntr <= reg_throttle_cyc; + end + end else begin + lines_left <= lines_left - 12'd1; + end + end + end + ST_WAIT: begin + if (throttle_cntr == 10'd0) + state <= ST_HDR; + else + throttle_cntr <= throttle_cntr - 10'd1; + end + default: begin + state <= ST_HDR; + end + endcase + end + end + + assign src_pyld_tdata = {NIPC{{~src_line_cnt[15:0], src_line_cnt[15:0]}}}; + assign src_pyld_tkeep = {NIPC{1'b1}}; + assign src_pyld_tlast = (lines_left == 12'd0); + assign src_pyld_tvalid = (state == ST_PYLD); + + assign src_ctxt_tdata = chdr_build_header( + 6'd0, 1'b0, 1'b0, CHDR_PKT_TYPE_DATA, CHDR_NO_MDATA, src_pkt_cnt[15:0], reg_src_bpp, 16'd0); + assign src_ctxt_tuser = CONTEXT_FIELD_HDR; + assign src_ctxt_tlast = 1'b1; + assign src_ctxt_tvalid = (state == ST_HDR && reg_src_en); + + + // Register Interface + // --------------------------- + always @(posedge rfnoc_chdr_clk) begin + if (rfnoc_chdr_rst) begin + ctrlport_resp_ack <= 1'b0; + end else begin + // All transactions finish in 1 cycle + ctrlport_resp_ack <= ctrlport_req_wr | ctrlport_req_rd; + // Handle register writes + if (ctrlport_req_wr) begin + case(ctrlport_req_addr) + REG_CTRL_STATUS: + {reg_src_en, reg_clear_cnts} <= ctrlport_req_data[1:0]; + REG_SRC_LINES_PER_PKT: + reg_src_lpp <= ctrlport_req_data[11:0]; + REG_SRC_BYTES_PER_PKT: + reg_src_bpp <= ctrlport_req_data[15:0]; + REG_SRC_THROTTLE_CYC: + reg_throttle_cyc <= ctrlport_req_data[9:0]; + endcase + end + // Handle register reads + if (ctrlport_req_rd) begin + case(ctrlport_req_addr) + REG_CTRL_STATUS: + ctrlport_resp_data <= {NIPC[7:0], 8'd32, state, 12'h0, reg_src_en, reg_clear_cnts}; + REG_SRC_LINES_PER_PKT: + ctrlport_resp_data <= {20'h0, reg_src_lpp}; + REG_SRC_BYTES_PER_PKT: + ctrlport_resp_data <= {16'h0, reg_src_bpp}; + REG_SRC_THROTTLE_CYC: + ctrlport_resp_data <= {22'h0, reg_throttle_cyc}; + REG_SNK_LINE_CNT_LO: + ctrlport_resp_data <= snk_line_cnt[31:0]; + REG_SNK_LINE_CNT_HI: + ctrlport_resp_data <= snk_line_cnt[63:32]; + REG_SNK_PKT_CNT_LO: + ctrlport_resp_data <= snk_pkt_cnt[31:0]; + REG_SNK_PKT_CNT_HI: + ctrlport_resp_data <= snk_pkt_cnt[63:32]; + REG_SRC_LINE_CNT_LO: + ctrlport_resp_data <= src_line_cnt[31:0]; + REG_SRC_LINE_CNT_HI: + ctrlport_resp_data <= src_line_cnt[63:32]; + REG_SRC_PKT_CNT_LO: + ctrlport_resp_data <= src_pkt_cnt[31:0]; + REG_SRC_PKT_CNT_HI: + ctrlport_resp_data <= src_pkt_cnt[63:32]; + REG_LOOP_LINE_CNT_LO: + ctrlport_resp_data <= loop_line_cnt[31:0]; + REG_LOOP_LINE_CNT_HI: + ctrlport_resp_data <= loop_line_cnt[63:32]; + REG_LOOP_PKT_CNT_LO: + ctrlport_resp_data <= loop_pkt_cnt[31:0]; + REG_LOOP_PKT_CNT_HI: + ctrlport_resp_data <= loop_pkt_cnt[63:32]; + default: + ctrlport_resp_data <= 32'h0; + endcase + end + end + end + +endmodule // rfnoc_block_null_src_sink diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_null_src_sink/rfnoc_block_null_src_sink_tb.sv b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_null_src_sink/rfnoc_block_null_src_sink_tb.sv new file mode 100644 index 000000000..f25e762b3 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_null_src_sink/rfnoc_block_null_src_sink_tb.sv @@ -0,0 +1,268 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_null_src_sink_tb +// + +`default_nettype none + + +module rfnoc_block_null_src_sink_tb; + + // Include macros and time declarations for use with PkgTestExec + `include "test_exec.svh" + + import PkgTestExec::*; + import PkgChdrUtils::*; + import PkgRfnocBlockCtrlBfm::*; + import PkgRfnocItemUtils::*; + + // Parameters + localparam [9:0] THIS_PORTID = 10'h17; + localparam [15:0] THIS_EPID = 16'hDEAD; + localparam int CHDR_W = 64; + localparam int SPP = 201; + localparam int LPP = ((SPP+1)/2); + localparam int NUM_PKTS = 50; + + localparam int PORT_SRCSNK = 0; + localparam int PORT_LOOP = 1; + + // Clock and Reset Definition + bit rfnoc_chdr_clk; + sim_clock_gen #(2.5) rfnoc_chdr_clk_gen (rfnoc_chdr_clk); // 400 MHz + + // ---------------------------------------- + // Instantiate DUT + // ---------------------------------------- + + // Connections to DUT as interfaces: + RfnocBackendIf backend (rfnoc_chdr_clk, rfnoc_chdr_clk); // Required backend iface + AxiStreamIf #(32) m_ctrl (rfnoc_chdr_clk); // Required control iface + AxiStreamIf #(32) s_ctrl (rfnoc_chdr_clk); // Required control iface + AxiStreamIf #(CHDR_W) m0_chdr (rfnoc_chdr_clk); // Optional data iface + AxiStreamIf #(CHDR_W) m1_chdr (rfnoc_chdr_clk); // Optional data iface + AxiStreamIf #(CHDR_W) s0_chdr (rfnoc_chdr_clk); // Optional data iface + AxiStreamIf #(CHDR_W) s1_chdr (rfnoc_chdr_clk); // Optional data iface + + // Bus functional model for a software block controller + RfnocBlockCtrlBfm #(.CHDR_W(CHDR_W)) blk_ctrl; + + // DUT + rfnoc_block_null_src_sink #( + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W), + .NIPC (2), + .MTU (10) + ) dut ( + .rfnoc_chdr_clk (backend.chdr_clk), + .rfnoc_ctrl_clk (backend.ctrl_clk), + .rfnoc_core_config (backend.slave.cfg), + .rfnoc_core_status (backend.slave.sts), + .s_rfnoc_chdr_tdata ({m1_chdr.slave.tdata , m0_chdr.slave.tdata }), + .s_rfnoc_chdr_tlast ({m1_chdr.slave.tlast , m0_chdr.slave.tlast }), + .s_rfnoc_chdr_tvalid({m1_chdr.slave.tvalid , m0_chdr.slave.tvalid }), + .s_rfnoc_chdr_tready({m1_chdr.slave.tready , m0_chdr.slave.tready }), + .m_rfnoc_chdr_tdata ({s1_chdr.master.tdata , s0_chdr.master.tdata }), + .m_rfnoc_chdr_tlast ({s1_chdr.master.tlast , s0_chdr.master.tlast }), + .m_rfnoc_chdr_tvalid({s1_chdr.master.tvalid, s0_chdr.master.tvalid}), + .m_rfnoc_chdr_tready({s1_chdr.master.tready, s0_chdr.master.tready}), + .s_rfnoc_ctrl_tdata (m_ctrl.slave.tdata ), + .s_rfnoc_ctrl_tlast (m_ctrl.slave.tlast ), + .s_rfnoc_ctrl_tvalid(m_ctrl.slave.tvalid ), + .s_rfnoc_ctrl_tready(m_ctrl.slave.tready ), + .m_rfnoc_ctrl_tdata (s_ctrl.master.tdata ), + .m_rfnoc_ctrl_tlast (s_ctrl.master.tlast ), + .m_rfnoc_ctrl_tvalid(s_ctrl.master.tvalid), + .m_rfnoc_ctrl_tready(s_ctrl.master.tready) + ); + + // ---------------------------------------- + // Test Process + // ---------------------------------------- + + initial begin + // Shared Variables + // ---------------------------------------- + timeout_t timeout; + ctrl_word_t rvalue = 0; + + // Initialize + // ---------------------------------------- + test.start_tb("rfnoc_block_null_src_sink_tb"); + + // Start the stream endpoint BFM + blk_ctrl = new(backend, m_ctrl, s_ctrl); + blk_ctrl.add_master_data_port(m0_chdr); + blk_ctrl.add_slave_data_port(s0_chdr); + blk_ctrl.add_master_data_port(m1_chdr); + blk_ctrl.add_slave_data_port(s1_chdr); + blk_ctrl.run(); + + // Startup block (Software initialization) + // ---------------------------------------- + test.start_test("Flush block then reset it"); + begin + test.start_timeout(timeout, 10us, "Waiting for flush_and_reset"); + #100; //Wait for GSR to deassert + blk_ctrl.flush_and_reset(); + test.end_timeout(timeout); + end + test.end_test(); + + // Run Tests + // ---------------------------------------- + test.start_test("Read Block Info"); + begin + test.start_timeout(timeout, 1us, "Waiting for block info response"); + // Get static block info and validate it + `ASSERT_ERROR(blk_ctrl.get_noc_id() == 1, "Incorrect noc_id Value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_i() == 2, "Incorrect num_data_i Value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_o() == 2, "Incorrect num_data_o Value"); + `ASSERT_ERROR(blk_ctrl.get_ctrl_fifosize() == 5, "Incorrect ctrl_fifosize Value"); + `ASSERT_ERROR(blk_ctrl.get_mtu() == 10, "Incorrect mtu Value"); + + // Read status register and validate it + blk_ctrl.reg_read(dut.REG_CTRL_STATUS, rvalue); + `ASSERT_ERROR(rvalue[31:24] == 2, "Incorrect NIPC Value"); + `ASSERT_ERROR(rvalue[23:16] == 32, "Incorrect ITEM_W Value"); + test.end_timeout(timeout); + end + test.end_test(); + + test.start_test("Stream Data Through Loopback Port"); + begin + // Send and receive packets + repeat (NUM_PKTS) begin + chdr_word_t rx_data[$]; + int rx_bytes; + automatic ItemDataBuff #(logic[31:0]) tx_dbuff = new, rx_dbuff = new; + for (int i = 0; i < SPP; i++) + tx_dbuff.put($urandom()); + test.start_timeout(timeout, 5us, "Waiting for pkt to loop back"); + blk_ctrl.send(PORT_LOOP, tx_dbuff.to_chdr_payload(), tx_dbuff.get_bytes()); + blk_ctrl.recv(PORT_LOOP, rx_data, rx_bytes); + rx_dbuff.from_chdr_payload(rx_data, rx_bytes); + `ASSERT_ERROR(rx_dbuff.equal(tx_dbuff), "Data mismatch"); + test.end_timeout(timeout); + end + + // Read item and packet counts on loopback port + blk_ctrl.reg_read(dut.REG_LOOP_LINE_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == (LPP*NUM_PKTS), "Incorrect REG_LOOP_LINE_CNT_LO value"); + blk_ctrl.reg_read(dut.REG_LOOP_PKT_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == NUM_PKTS, "Incorrect REG_LOOP_PKT_CNT_LO value"); + + // Read item and packet counts on source port + blk_ctrl.reg_read(dut.REG_SRC_LINE_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == 0, "Incorrect REG_SRC_LINE_CNT_LO value"); + blk_ctrl.reg_read(dut.REG_SRC_PKT_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == 0, "Incorrect REG_SRC_PKT_CNT_LO value"); + + // Read item and packet counts on sink port + blk_ctrl.reg_read(dut.REG_SNK_LINE_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == 0, "Incorrect REG_SNK_LINE_CNT_LO value"); + blk_ctrl.reg_read(dut.REG_SNK_PKT_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == 0, "Incorrect REG_SNK_PKT_CNT_LO value"); + end + test.end_test(); + + test.start_test("Stream Data To Sink Port"); + begin + // Send packets + repeat (NUM_PKTS) begin + chdr_word_t rx_data[$]; + int rx_bytes; + automatic ItemDataBuff #(logic[31:0]) tx_dbuff = new; + for (int i = 0; i < SPP; i++) + tx_dbuff.put($urandom()); + test.start_timeout(timeout, 5us, "Waiting for pkt to loop back"); + blk_ctrl.send(PORT_SRCSNK, tx_dbuff.to_chdr_payload(), tx_dbuff.get_bytes()); + test.end_timeout(timeout); + end + repeat (NUM_PKTS * SPP * 2) @(posedge rfnoc_chdr_clk); + + // Read item and packet counts on loopback port + blk_ctrl.reg_read(dut.REG_LOOP_LINE_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == (LPP*NUM_PKTS), "Incorrect REG_LOOP_LINE_CNT_LO value"); + blk_ctrl.reg_read(dut.REG_LOOP_PKT_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == NUM_PKTS, "Incorrect REG_LOOP_PKT_CNT_LO value"); + + // Read item and packet counts on source port + blk_ctrl.reg_read(dut.REG_SRC_LINE_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == 0, "Incorrect REG_SRC_LINE_CNT_LO value"); + blk_ctrl.reg_read(dut.REG_SRC_PKT_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == 0, "Incorrect REG_SRC_PKT_CNT_LO value"); + + // Read item and packet counts on sink port + blk_ctrl.reg_read(dut.REG_SNK_LINE_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == (LPP*NUM_PKTS), "Incorrect REG_SNK_LINE_CNT_LO value"); + blk_ctrl.reg_read(dut.REG_SNK_PKT_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == NUM_PKTS, "Incorrect REG_SNK_PKT_CNT_LO value"); + end + test.end_test(); + + test.start_test("Stream Data From Source Port"); + begin + // Turn on the source for some time then stop it + blk_ctrl.reg_write(dut.REG_SRC_LINES_PER_PKT, LPP-1); + blk_ctrl.reg_write(dut.REG_SRC_BYTES_PER_PKT, (LPP+1)*8); + blk_ctrl.reg_write(dut.REG_CTRL_STATUS, 2'b10); + repeat ((NUM_PKTS / 10) * LPP) @(posedge rfnoc_chdr_clk); + blk_ctrl.reg_write(dut.REG_CTRL_STATUS, 2'b00); + blk_ctrl.reg_read(dut.REG_SRC_PKT_CNT_LO, rvalue); + repeat (rvalue * LPP * 2) @(posedge rfnoc_chdr_clk); + blk_ctrl.reg_read(dut.REG_SRC_PKT_CNT_LO, rvalue); + + // Gather the accumulated packets and verify contents + for (int p = 0; p < rvalue; p++) begin + chdr_word_t exp_data[$]; + chdr_word_t rx_data[$]; + int rx_bytes; + test.start_timeout(timeout, 5us, "Waiting for pkt to arrive"); + exp_data.delete(); + for (int i = p*LPP; i < (p+1)*LPP; i++) + exp_data.push_back({~i[15:0], i[15:0], ~i[15:0], i[15:0]}); + blk_ctrl.recv(PORT_SRCSNK, rx_data, rx_bytes); + `ASSERT_ERROR(blk_ctrl.compare_data(exp_data, rx_data), "Data mismatch"); + test.end_timeout(timeout); + end + end + test.end_test(); + + test.start_test("Clear Counts"); + begin + test.start_timeout(timeout, 1us, "Waiting for clear and readbacks"); + // Clear + blk_ctrl.reg_write(dut.REG_CTRL_STATUS, 2'b01); + + // Read item and packet counts on loopback port + blk_ctrl.reg_read(dut.REG_LOOP_LINE_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == 0, "Incorrect REG_LOOP_LINE_CNT_LO value"); + blk_ctrl.reg_read(dut.REG_LOOP_PKT_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == 0, "Incorrect REG_LOOP_PKT_CNT_LO value"); + + // Read item and packet counts on source port + blk_ctrl.reg_read(dut.REG_SRC_LINE_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == 0, "Incorrect REG_SRC_LINE_CNT_LO value"); + blk_ctrl.reg_read(dut.REG_SRC_PKT_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == 0, "Incorrect REG_SRC_PKT_CNT_LO value"); + + // Read item and packet counts on sink port + blk_ctrl.reg_read(dut.REG_SNK_LINE_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == 0, "Incorrect REG_SNK_LINE_CNT_LO value"); + blk_ctrl.reg_read(dut.REG_SNK_PKT_CNT_LO, rvalue); + `ASSERT_ERROR(rvalue == 0, "Incorrect REG_SNK_PKT_CNT_LO value"); + test.end_timeout(timeout); + end + test.end_test(); + + // Finish Up + // ---------------------------------------- + // Display final statistics and results + test.end_tb(); + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/Makefile b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/Makefile new file mode 100644 index 000000000..63d6f1851 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/Makefile @@ -0,0 +1,47 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/utils/Makefile.srcs +include Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(RFNOC_CORE_SRCS) \ +$(RFNOC_UTIL_SRCS) \ +$(RFNOC_BLOCK_RADIO_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +SIM_TOP = rfnoc_block_radio_all_tb + +SIM_SRCS = \ +$(abspath sim_radio_gen.sv) \ +$(abspath rfnoc_block_radio_tb.sv) \ +$(abspath rfnoc_block_radio_all_tb.sv) + +# MODELSIM_USER_DO = $(abspath wave.do) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/Makefile.srcs b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/Makefile.srcs new file mode 100644 index 000000000..84dd01541 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/Makefile.srcs @@ -0,0 +1,20 @@ +# +# Copyright 2018 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +################################################## +# RFNoC Utility Sources +################################################## +RFNOC_BLOCK_RADIO_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/blocks/rfnoc_block_radio/, \ +rfnoc_block_radio_regs.vh \ +radio_rx_core.v \ +radio_tx_core.v \ +radio_core.v \ +noc_shell_radio.v \ +rfnoc_block_radio.v \ +rx_frontend_gen3.v \ +tx_frontend_gen3.v \ +quarter_rate_downconverter.v \ +)) diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/noc_shell_radio.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/noc_shell_radio.v new file mode 100644 index 000000000..32ab32b63 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/noc_shell_radio.v @@ -0,0 +1,290 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: noc_shell_radio +// +// Description: A NoC Shell for RFNoC. This should eventually be replaced +// by an auto-generated NoC Shell. +// + +module noc_shell_radio #( + parameter [31:0] NOC_ID = 32'h0, + parameter [ 9:0] THIS_PORTID = 10'd0, + parameter CHDR_W = 64, + parameter [ 0:0] CTRLPORT_SLV_EN = 1, + parameter [ 0:0] CTRLPORT_MST_EN = 1, + parameter [ 5:0] CTRL_FIFO_SIZE = 9, + parameter [ 5:0] NUM_DATA_I = 1, + parameter [ 5:0] NUM_DATA_O = 1, + parameter ITEM_W = 32, + parameter NIPC = 2, + parameter PYLD_FIFO_SIZE = 10, + parameter MTU = 10 +)( + //--------------------------------------------------------------------------- + // Framework Interface + //--------------------------------------------------------------------------- + + // RFNoC Framework Clocks and Resets + input wire rfnoc_chdr_clk, + output wire rfnoc_chdr_rst, + input wire rfnoc_ctrl_clk, + output wire rfnoc_ctrl_rst, + // RFNoC Backend Interface + input wire [ 511:0] rfnoc_core_config, + output wire [ 511:0] rfnoc_core_status, + // CHDR Input Ports (from framework) + input wire [(CHDR_W*NUM_DATA_I)-1:0] s_rfnoc_chdr_tdata, + input wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tlast, + input wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tvalid, + output wire [ NUM_DATA_I-1:0] s_rfnoc_chdr_tready, + // CHDR Output Ports (to framework) + output wire [(CHDR_W*NUM_DATA_O)-1:0] m_rfnoc_chdr_tdata, + output wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tlast, + output wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tvalid, + input wire [ NUM_DATA_O-1:0] m_rfnoc_chdr_tready, + // AXIS-Ctrl Input Port (from framework) + input wire [ 31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + // AXIS-Ctrl Output Port (to framework) + output wire [ 31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready, + + //--------------------------------------------------------------------------- + // Client Control Port Interface + //--------------------------------------------------------------------------- + + // Clock + input wire ctrlport_clk, + input wire ctrlport_rst, + // Master + output wire m_ctrlport_req_wr, + output wire m_ctrlport_req_rd, + output wire [19:0] m_ctrlport_req_addr, + output wire [31:0] m_ctrlport_req_data, + output wire [ 3:0] m_ctrlport_req_byte_en, + output wire m_ctrlport_req_has_time, + output wire [63:0] m_ctrlport_req_time, + input wire m_ctrlport_resp_ack, + input wire [ 1:0] m_ctrlport_resp_status, + input wire [31:0] m_ctrlport_resp_data, + // Slave + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [ 9:0] s_ctrlport_req_portid, + input wire [15:0] s_ctrlport_req_rem_epid, + input wire [ 9:0] s_ctrlport_req_rem_portid, + input wire [31:0] s_ctrlport_req_data, + input wire [ 3:0] s_ctrlport_req_byte_en, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + output wire s_ctrlport_resp_ack, + output wire [ 1:0] s_ctrlport_resp_status, + output wire [31:0] s_ctrlport_resp_data, + + //--------------------------------------------------------------------------- + // Client Data Interface + //--------------------------------------------------------------------------- + + // Clock + input wire axis_data_clk, + input wire axis_data_rst, + + // Output data stream (to user logic) + output wire [(NUM_DATA_I*ITEM_W*NIPC)-1:0] m_axis_tdata, + output wire [ (NUM_DATA_I*NIPC)-1:0] m_axis_tkeep, + output wire [ NUM_DATA_I-1:0] m_axis_tlast, + output wire [ NUM_DATA_I-1:0] m_axis_tvalid, + input wire [ NUM_DATA_I-1:0] m_axis_tready, + // Sideband information + output wire [ (NUM_DATA_I*64)-1:0] m_axis_ttimestamp, + output wire [ NUM_DATA_I-1:0] m_axis_thas_time, + output wire [ NUM_DATA_I-1:0] m_axis_teov, + output wire [ NUM_DATA_I-1:0] m_axis_teob, + + // Input data stream (from user logic) + input wire [(NUM_DATA_O*ITEM_W*NIPC)-1:0] s_axis_tdata, + input wire [ (NUM_DATA_O*NIPC)-1:0] s_axis_tkeep, + input wire [ NUM_DATA_O-1:0] s_axis_tlast, + input wire [ NUM_DATA_O-1:0] s_axis_tvalid, + output wire [ NUM_DATA_O-1:0] s_axis_tready, + // Sideband info (sampled on the first cycle of the packet) + input wire [ (NUM_DATA_O*64)-1:0] s_axis_ttimestamp, + input wire [ NUM_DATA_O-1:0] s_axis_thas_time, + input wire [ NUM_DATA_O-1:0] s_axis_teov, + input wire [ NUM_DATA_O-1:0] s_axis_teob +); + + localparam SNK_INFO_FIFO_SIZE = 4; + localparam SNK_PYLD_FIFO_SIZE = PYLD_FIFO_SIZE; + localparam SRC_INFO_FIFO_SIZE = 4; + localparam SRC_PYLD_FIFO_SIZE = MTU; + + //--------------------------------------------------------------------------- + // Backend Interface + //--------------------------------------------------------------------------- + + wire data_i_flush_en; + wire [31:0] data_i_flush_timeout; + wire [63:0] data_i_flush_active; + wire [63:0] data_i_flush_done; + wire data_o_flush_en; + wire [31:0] data_o_flush_timeout; + wire [63:0] data_o_flush_active; + wire [63:0] data_o_flush_done; + + backend_iface #( + .NOC_ID (NOC_ID), + .NUM_DATA_I (NUM_DATA_I), + .NUM_DATA_O (NUM_DATA_O), + .CTRL_FIFOSIZE (CTRL_FIFO_SIZE), + .MTU (MTU) + ) backend_iface_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk), + .rfnoc_core_config (rfnoc_core_config), + .rfnoc_core_status (rfnoc_core_status), + .rfnoc_chdr_rst (rfnoc_chdr_rst), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst), + .data_i_flush_en (data_i_flush_en), + .data_i_flush_timeout (data_i_flush_timeout), + .data_i_flush_active (data_i_flush_active), + .data_i_flush_done (data_i_flush_done), + .data_o_flush_en (data_o_flush_en), + .data_o_flush_timeout (data_o_flush_timeout), + .data_o_flush_active (data_o_flush_active), + .data_o_flush_done (data_o_flush_done) + ); + + //--------------------------------------------------------------------------- + // Control Path + //--------------------------------------------------------------------------- + + ctrlport_endpoint #( + .THIS_PORTID (THIS_PORTID ), + .SYNC_CLKS (0 ), + .AXIS_CTRL_MST_EN (CTRLPORT_SLV_EN), + .AXIS_CTRL_SLV_EN (CTRLPORT_MST_EN), + .SLAVE_FIFO_SIZE (CTRL_FIFO_SIZE ) + ) ctrlport_ep_i ( + .rfnoc_ctrl_clk (rfnoc_ctrl_clk ), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst ), + .ctrlport_clk (ctrlport_clk ), + .ctrlport_rst (ctrlport_rst ), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata ), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast ), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid ), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready ), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata ), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast ), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid ), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready ), + .m_ctrlport_req_wr (m_ctrlport_req_wr ), + .m_ctrlport_req_rd (m_ctrlport_req_rd ), + .m_ctrlport_req_addr (m_ctrlport_req_addr ), + .m_ctrlport_req_data (m_ctrlport_req_data ), + .m_ctrlport_req_byte_en (m_ctrlport_req_byte_en ), + .m_ctrlport_req_has_time (m_ctrlport_req_has_time ), + .m_ctrlport_req_time (m_ctrlport_req_time ), + .m_ctrlport_resp_ack (m_ctrlport_resp_ack ), + .m_ctrlport_resp_status (m_ctrlport_resp_status ), + .m_ctrlport_resp_data (m_ctrlport_resp_data ), + .s_ctrlport_req_wr (s_ctrlport_req_wr ), + .s_ctrlport_req_rd (s_ctrlport_req_rd ), + .s_ctrlport_req_addr (s_ctrlport_req_addr ), + .s_ctrlport_req_portid (s_ctrlport_req_portid ), + .s_ctrlport_req_rem_epid (s_ctrlport_req_rem_epid ), + .s_ctrlport_req_rem_portid(s_ctrlport_req_rem_portid), + .s_ctrlport_req_data (s_ctrlport_req_data ), + .s_ctrlport_req_byte_en (s_ctrlport_req_byte_en ), + .s_ctrlport_req_has_time (s_ctrlport_req_has_time ), + .s_ctrlport_req_time (s_ctrlport_req_time ), + .s_ctrlport_resp_ack (s_ctrlport_resp_ack ), + .s_ctrlport_resp_status (s_ctrlport_resp_status ), + .s_ctrlport_resp_data (s_ctrlport_resp_data ) + ); + + //--------------------------------------------------------------------------- + // Data Path + //--------------------------------------------------------------------------- + + genvar i; + generate + + for (i = 0; i < NUM_DATA_I; i = i + 1) begin: chdr_to_data + chdr_to_axis_data #( + .CHDR_W (CHDR_W), + .ITEM_W (ITEM_W), + .NIPC (NIPC), + .SYNC_CLKS (0), + .INFO_FIFO_SIZE (SNK_INFO_FIFO_SIZE), + .PYLD_FIFO_SIZE (SNK_PYLD_FIFO_SIZE) + ) chdr_to_axis_data_i ( + .axis_chdr_clk (rfnoc_chdr_clk), + .axis_chdr_rst (rfnoc_chdr_rst), + .axis_data_clk (axis_data_clk), + .axis_data_rst (axis_data_rst), + .s_axis_chdr_tdata (s_rfnoc_chdr_tdata [(i*CHDR_W)+:CHDR_W]), + .s_axis_chdr_tlast (s_rfnoc_chdr_tlast [i]), + .s_axis_chdr_tvalid (s_rfnoc_chdr_tvalid [i]), + .s_axis_chdr_tready (s_rfnoc_chdr_tready [i]), + .m_axis_tdata (m_axis_tdata [i*ITEM_W*NIPC +: ITEM_W*NIPC]), + .m_axis_tkeep (m_axis_tkeep [i*NIPC +: NIPC]), + .m_axis_tlast (m_axis_tlast [i]), + .m_axis_tvalid (m_axis_tvalid [i]), + .m_axis_tready (m_axis_tready [i]), + .m_axis_ttimestamp (m_axis_ttimestamp [i*64 +: 64]), + .m_axis_thas_time (m_axis_thas_time [i]), + .m_axis_tlength (), + .m_axis_teov (m_axis_teov [i]), + .m_axis_teob (m_axis_teob [i]), + .flush_en (data_i_flush_en), + .flush_timeout (data_i_flush_timeout), + .flush_active (data_i_flush_active [i]), + .flush_done (data_i_flush_done [i]) + ); + end + + for (i = 0; i < NUM_DATA_O; i = i + 1) begin: data_to_chdr + axis_data_to_chdr #( + .CHDR_W (CHDR_W), + .ITEM_W (ITEM_W), + .NIPC (NIPC), + .SYNC_CLKS (0), + .INFO_FIFO_SIZE (4), + .PYLD_FIFO_SIZE (SRC_INFO_FIFO_SIZE), + .MTU (SRC_PYLD_FIFO_SIZE) + ) axis_data_to_chdr_i ( + .axis_chdr_clk (rfnoc_chdr_clk), + .axis_chdr_rst (rfnoc_chdr_rst), + .axis_data_clk (axis_data_clk), + .axis_data_rst (axis_data_rst), + .m_axis_chdr_tdata (m_rfnoc_chdr_tdata [i*CHDR_W +: CHDR_W]), + .m_axis_chdr_tlast (m_rfnoc_chdr_tlast [i]), + .m_axis_chdr_tvalid (m_rfnoc_chdr_tvalid [i]), + .m_axis_chdr_tready (m_rfnoc_chdr_tready [i]), + .s_axis_tdata (s_axis_tdata [i*ITEM_W*NIPC +: ITEM_W*NIPC]), + .s_axis_tkeep (s_axis_tkeep [i*NIPC +: NIPC]), + .s_axis_tlast (s_axis_tlast [i]), + .s_axis_tvalid (s_axis_tvalid [i]), + .s_axis_tready (s_axis_tready [i]), + .s_axis_ttimestamp (s_axis_ttimestamp [i*64 +: 64]), + .s_axis_thas_time (s_axis_thas_time [i]), + .s_axis_teov (s_axis_teov [i]), + .s_axis_teob (s_axis_teob [i]), + .flush_en (data_o_flush_en), + .flush_timeout (data_o_flush_timeout), + .flush_active (data_o_flush_active [i]), + .flush_done (data_o_flush_done [i]) + ); + end + endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/quarter_rate_downconverter.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/quarter_rate_downconverter.v new file mode 100644 index 000000000..ded9a8c0b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/quarter_rate_downconverter.v @@ -0,0 +1,138 @@ +// +// Copyright 2018 Ettus Research, a National Instruments Brand +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +// mixer with 90 degree angles, i.e., multiplying the input signal with 1, i, -1, -i: + +// Let S(t) = I(t) + i*Q(t) be the input signal based on inputs i_in and q_in +// Multiplying with (1,i,-1,-i) then becomes: +// S(t) * 1 = I(t) + i*Q(t) +// S(t) * i = -Q(t) + i*I(t) +// S(t) * -1 = -I(t) - i*Q(t) +// S(t) * -i = Q(t) - i*I(t) + +// To control the direction of rotation, the dirctn input is used +// When set to 0, the phase is increased with pi/2 every sample, i.e., rotating counter clock wise +// When set to 1, the phase is increased with -pi/2 every sample, i.e., rotating clock wise + +// the input is the concatenation of the i and q signal: {i_in, q_in} + +module quarter_rate_downconverter #( + parameter WIDTH=24 +)( + input clk, + input reset, + input phase_sync, + + input [2*WIDTH-1:0] i_tdata, + input i_tlast, + input i_tvalid, + output i_tready, + + output [2*WIDTH-1:0] o_tdata, + output o_tlast, + output o_tvalid, + input o_tready, + + input dirctn +); + + // temporary signals for i and q after rotation + reg [WIDTH-1:0] tmp_i = {WIDTH{1'b0}}; + reg [WIDTH-1:0] tmp_q = {WIDTH{1'b0}}; + + // State machine types and reg + localparam S0=0, S1=1, S2=2, S3=3; + reg[1:0] cur_state; + + // split input into i and q signal + wire[WIDTH-1:0] i_in, q_in; + assign i_in = i_tdata[2*WIDTH-1:WIDTH]; + assign q_in = i_tdata[WIDTH-1:0]; + + // The state machine doing the rotations among states + always @(posedge clk) begin + if(reset || phase_sync) begin + cur_state <= S0; + end else begin + case (cur_state) + S0: begin + if(i_tvalid == 1'b1 && i_tready == 1'b1) + if(dirctn == 1'b0) + cur_state <= S1; + else + cur_state <= S3; + else + cur_state <= S0; + end + S1: begin + if(i_tvalid == 1'b1 && i_tready == 1'b1) + if(dirctn == 1'b0) + cur_state <= S2; + else + cur_state <= S0; + else + cur_state <= S1; + end + S2: begin + if(i_tvalid == 1'b1 && i_tready == 1'b1) + if(dirctn == 1'b0) + cur_state <= S3; + else + cur_state <= S1; + else + cur_state <= S2; + end + S3: begin + if(i_tvalid == 1'b1 && i_tready == 1'b1) + if(dirctn == 1'b0) + cur_state <= S0; + else + cur_state <= S2; + else + cur_state <= S3; + end + endcase + end + end + + // Multiplication of input IQ signal with (1,i,-1,-i): + always @(*) begin + case (cur_state) + S0: begin + // S(t) * 1 = I(t) + iQ(t): + tmp_i = i_in; + tmp_q = q_in; + end + S1: begin + // S(t) * i = -Q(t) + iI(t): + tmp_i = -q_in; + tmp_q = i_in; + end + S2: begin + // S(t) * -1 = -I(t) - iQ(t): + tmp_i = -i_in; + tmp_q = -q_in; + end + S3: begin + // S(t) * -i = Q(t) - iI(t): + tmp_i = q_in; + tmp_q = -i_in; + end + default: begin + tmp_i = i_in; + tmp_q = q_in; + end + endcase + end + + // Flop for valid and ready signals and shortening of comb. paths. + axi_fifo #(.WIDTH(2*WIDTH + 1), .SIZE(1)) flop ( + .clk(clk), .reset(reset), .clear(1'b0), + .i_tdata({i_tlast, tmp_i, tmp_q}), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata({o_tlast, o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready), + .occupied(), .space()); + +endmodule // quarter_rate_downconverter diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/radio_core.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/radio_core.v new file mode 100644 index 000000000..9456fc398 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/radio_core.v @@ -0,0 +1,370 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: radio_core +// +// Description: +// +// A radio core for RFNoC. This core contains all logic in the radio clock +// domain for interfacing to a single RX/TX radio. It includes registers shared +// by both Rx and Tx logic and instantiates Rx and Tx interface cores. +// +// Parameters: +// +// BASE_ADDR : Base address for this radio block instance +// SAMP_W : Width of a radio sample +// NSPC : Number of radio samples per radio clock cycle +// + + +module radio_core #( + parameter SAMP_W = 32, + parameter NSPC = 1 +) ( + input wire radio_clk, + input wire radio_rst, + + + //--------------------------------------------------------------------------- + // Control Interface + //--------------------------------------------------------------------------- + + // Slave + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + output wire s_ctrlport_resp_ack, + output wire [31:0] s_ctrlport_resp_data, + + // Master + output wire m_ctrlport_req_wr, + output wire [19:0] m_ctrlport_req_addr, + output wire [ 9:0] m_ctrlport_req_portid, + output wire [15:0] m_ctrlport_req_rem_epid, + output wire [ 9:0] m_ctrlport_req_rem_portid, + output wire [31:0] m_ctrlport_req_data, + output wire m_ctrlport_req_has_time, + output wire [63:0] m_ctrlport_req_time, + input wire m_ctrlport_resp_ack, + + + //--------------------------------------------------------------------------- + // Data Interface + //--------------------------------------------------------------------------- + + // Tx Radio Data Stream + input wire [(SAMP_W*NSPC)-1:0] s_axis_tdata, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + // Sideband info + input wire [ 63:0] s_axis_ttimestamp, + input wire s_axis_thas_time, + input wire s_axis_teob, + + // Rx Radio Data Stream + output wire [(SAMP_W*NSPC)-1:0] m_axis_tdata, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready, + // Sideband info + output wire [ 63:0] m_axis_ttimestamp, + output wire m_axis_thas_time, + output wire m_axis_teob, + + + //--------------------------------------------------------------------------- + // Radio Interface + //--------------------------------------------------------------------------- + + input wire [63:0] radio_time, + + // Radio Rx Interface + input wire [SAMP_W*NSPC-1:0] radio_rx_data, + input wire radio_rx_stb, + output wire radio_rx_running, + + // Radio Tx Interface + output wire [SAMP_W*NSPC-1:0] radio_tx_data, + input wire radio_tx_stb, + output wire radio_tx_running +); + + `include "rfnoc_block_radio_regs.vh" + + + //--------------------------------------------------------------------------- + // Split Control Port Interface + //--------------------------------------------------------------------------- + // + // This block splits the single slave interface of the radio core into + // multiple interfaces, one for each subcomponent. The responses from each + // subcomponent are merged into a single response and sent back out the slave + // interface. + // + //--------------------------------------------------------------------------- + + // Registers shared by Rx and Tx + wire ctrlport_general_req_wr; + wire ctrlport_general_req_rd; + wire [19:0] ctrlport_general_req_addr; + wire [31:0] ctrlport_general_req_data; + reg ctrlport_general_resp_ack = 1'b0; + reg [31:0] ctrlport_general_resp_data = 0; + + // Tx core registers + wire ctrlport_tx_req_wr; + wire ctrlport_tx_req_rd; + wire [19:0] ctrlport_tx_req_addr; + wire [31:0] ctrlport_tx_req_data; + wire ctrlport_tx_resp_ack; + wire [31:0] ctrlport_tx_resp_data; + + // Rx core registers + wire ctrlport_rx_req_wr; + wire ctrlport_rx_req_rd; + wire [19:0] ctrlport_rx_req_addr; + wire [31:0] ctrlport_rx_req_data; + wire ctrlport_rx_resp_ack; + wire [31:0] ctrlport_rx_resp_data; + + ctrlport_splitter #( + .NUM_SLAVES (3) + ) ctrlport_decoder_i ( + .ctrlport_clk (radio_clk), + .ctrlport_rst (radio_rst), + .s_ctrlport_req_wr (s_ctrlport_req_wr), + .s_ctrlport_req_rd (s_ctrlport_req_rd), + .s_ctrlport_req_addr (s_ctrlport_req_addr), + .s_ctrlport_req_data (s_ctrlport_req_data), + .s_ctrlport_req_byte_en (4'b0), + .s_ctrlport_req_has_time (1'b0), + .s_ctrlport_req_time (64'b0), + .s_ctrlport_resp_ack (s_ctrlport_resp_ack), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (s_ctrlport_resp_data), + .m_ctrlport_req_wr ({ctrlport_general_req_wr, + ctrlport_tx_req_wr, + ctrlport_rx_req_wr}), + .m_ctrlport_req_rd ({ctrlport_general_req_rd, + ctrlport_tx_req_rd, + ctrlport_rx_req_rd}), + .m_ctrlport_req_addr ({ctrlport_general_req_addr, + ctrlport_tx_req_addr, + ctrlport_rx_req_addr}), + .m_ctrlport_req_data ({ctrlport_general_req_data, + ctrlport_tx_req_data, + ctrlport_rx_req_data}), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (), + .m_ctrlport_req_time (), + .m_ctrlport_resp_ack ({ctrlport_general_resp_ack, + ctrlport_tx_resp_ack, + ctrlport_rx_resp_ack}), + .m_ctrlport_resp_status (6'b0), + .m_ctrlport_resp_data ({ctrlport_general_resp_data, + ctrlport_tx_resp_data, + ctrlport_rx_resp_data}) + ); + + + //--------------------------------------------------------------------------- + // Merge Control Port Interfaces + //--------------------------------------------------------------------------- + // + // This block merges the master control port interfaces of the Rx and Tx + // cores into a single master control port interface. Both the Rx and Tx + // cores support error reporting by writing to a control port interface. This + // block arbitrates the requests between the Rx and Tx cores. Rx and Tx only + // support writes for error reporting, not reads. Time and byte enables are + // also not needed. Hence, several ports are unconnected. + // + //--------------------------------------------------------------------------- + + // Tx and Rx error reporting signals + wire ctrlport_err_tx_req_wr, ctrlport_err_rx_req_wr; + wire [19:0] ctrlport_err_tx_req_addr, ctrlport_err_rx_req_addr; + wire [31:0] ctrlport_err_tx_req_data, ctrlport_err_rx_req_data; + wire ctrlport_err_tx_req_has_time, ctrlport_err_rx_req_has_time; + wire [63:0] ctrlport_err_tx_req_time, ctrlport_err_rx_req_time; + wire [ 9:0] ctrlport_err_tx_req_portid, ctrlport_err_rx_req_portid; + wire [15:0] ctrlport_err_tx_req_rem_epid, ctrlport_err_rx_req_rem_epid; + wire [ 9:0] ctrlport_err_tx_req_rem_portid, ctrlport_err_rx_req_rem_portid; + wire ctrlport_err_tx_resp_ack, ctrlport_err_rx_resp_ack; + + + ctrlport_combiner #( + .NUM_MASTERS (2), + .PRIORITY (0) + ) ctrlport_req_combine_i ( + .ctrlport_clk (radio_clk), + .ctrlport_rst (radio_rst), + .s_ctrlport_req_wr ({ctrlport_err_tx_req_wr, ctrlport_err_rx_req_wr}), + .s_ctrlport_req_rd (2'b0), + .s_ctrlport_req_addr ({ctrlport_err_tx_req_addr, ctrlport_err_rx_req_addr}), + .s_ctrlport_req_portid ({ctrlport_err_tx_req_portid, ctrlport_err_rx_req_portid}), + .s_ctrlport_req_rem_epid ({ctrlport_err_tx_req_rem_epid, ctrlport_err_rx_req_rem_epid}), + .s_ctrlport_req_rem_portid ({ctrlport_err_tx_req_rem_portid, ctrlport_err_rx_req_rem_portid}), + .s_ctrlport_req_data ({ctrlport_err_tx_req_data, ctrlport_err_rx_req_data}), + .s_ctrlport_req_byte_en (8'hFF), + .s_ctrlport_req_has_time ({ctrlport_err_tx_req_has_time, ctrlport_err_rx_req_has_time}), + .s_ctrlport_req_time ({ctrlport_err_tx_req_time, ctrlport_err_rx_req_time}), + .s_ctrlport_resp_ack ({ctrlport_err_tx_resp_ack, ctrlport_err_rx_resp_ack}), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (), + .m_ctrlport_req_wr (m_ctrlport_req_wr), + .m_ctrlport_req_rd (), + .m_ctrlport_req_addr (m_ctrlport_req_addr), + .m_ctrlport_req_portid (m_ctrlport_req_portid), + .m_ctrlport_req_rem_epid (m_ctrlport_req_rem_epid), + .m_ctrlport_req_rem_portid (m_ctrlport_req_rem_portid), + .m_ctrlport_req_data (m_ctrlport_req_data), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (m_ctrlport_req_has_time), + .m_ctrlport_req_time (m_ctrlport_req_time), + .m_ctrlport_resp_ack (m_ctrlport_resp_ack), + .m_ctrlport_resp_status (2'b0), + .m_ctrlport_resp_data (0) + ); + + + //--------------------------------------------------------------------------- + // General Registers + //--------------------------------------------------------------------------- + // + // These are registers that apply to both Rx and Tx and are shared by both. + // + //--------------------------------------------------------------------------- + + reg reg_loopback_en = 1'b0; + + always @(posedge radio_clk) begin + if (radio_rst) begin + ctrlport_general_resp_ack <= 0; + ctrlport_general_resp_data <= 0; + reg_loopback_en <= 0; + end else begin + // Default assignments + ctrlport_general_resp_ack <= 0; + ctrlport_general_resp_data <= 0; + + // Handle register writes + if (ctrlport_general_req_wr) begin + case (ctrlport_general_req_addr) + REG_LOOPBACK_EN: begin + reg_loopback_en <= ctrlport_general_req_data[0]; + ctrlport_general_resp_ack <= 1; + end + endcase + end + + // Handle register reads + if (ctrlport_general_req_rd) begin + case (ctrlport_general_req_addr) + REG_LOOPBACK_EN: begin + ctrlport_general_resp_data <= 0; + ctrlport_general_resp_data[0] <= reg_loopback_en; + ctrlport_general_resp_ack <= 1; + end + REG_RADIO_WIDTH: begin + ctrlport_general_resp_data <= { SAMP_W[15:0], NSPC[15:0] }; + ctrlport_general_resp_ack <= 1; + end + endcase + end + end + end + + + //--------------------------------------------------------------------------- + // Tx to Rx Loopback + //--------------------------------------------------------------------------- + + wire [SAMP_W*NSPC-1:0] radio_rx_data_mux; + wire radio_rx_stb_mux; + + assign radio_rx_data_mux = reg_loopback_en ? radio_tx_data : radio_rx_data; + assign radio_rx_stb_mux = reg_loopback_en ? radio_tx_stb : radio_rx_stb; + + + //--------------------------------------------------------------------------- + // Tx Core + //--------------------------------------------------------------------------- + + radio_tx_core #( + .SAMP_W (SAMP_W), + .NSPC (NSPC) + ) radio_tx_core_i ( + .radio_clk (radio_clk), + .radio_rst (radio_rst), + .s_ctrlport_req_wr (ctrlport_tx_req_wr), + .s_ctrlport_req_rd (ctrlport_tx_req_rd), + .s_ctrlport_req_addr (ctrlport_tx_req_addr), + .s_ctrlport_req_data (ctrlport_tx_req_data), + .s_ctrlport_resp_ack (ctrlport_tx_resp_ack), + .s_ctrlport_resp_data (ctrlport_tx_resp_data), + .m_ctrlport_req_wr (ctrlport_err_tx_req_wr), + .m_ctrlport_req_addr (ctrlport_err_tx_req_addr), + .m_ctrlport_req_data (ctrlport_err_tx_req_data), + .m_ctrlport_req_has_time (ctrlport_err_tx_req_has_time), + .m_ctrlport_req_time (ctrlport_err_tx_req_time), + .m_ctrlport_req_portid (ctrlport_err_tx_req_portid), + .m_ctrlport_req_rem_epid (ctrlport_err_tx_req_rem_epid), + .m_ctrlport_req_rem_portid (ctrlport_err_tx_req_rem_portid), + .m_ctrlport_resp_ack (ctrlport_err_tx_resp_ack), + .radio_time (radio_time), + .radio_tx_data (radio_tx_data), + .radio_tx_stb (radio_tx_stb), + .radio_tx_running (radio_tx_running), + .s_axis_tdata (s_axis_tdata), + .s_axis_tlast (s_axis_tlast), + .s_axis_tvalid (s_axis_tvalid), + .s_axis_tready (s_axis_tready), + .s_axis_ttimestamp (s_axis_ttimestamp), + .s_axis_thas_time (s_axis_thas_time), + .s_axis_teob (s_axis_teob) + ); + + + //--------------------------------------------------------------------------- + // Rx Core + //--------------------------------------------------------------------------- + + radio_rx_core #( + .SAMP_W (SAMP_W), + .NSPC (NSPC) + ) radio_rx_core_i ( + .radio_clk (radio_clk), + .radio_rst (radio_rst), + .s_ctrlport_req_wr (ctrlport_rx_req_wr), + .s_ctrlport_req_rd (ctrlport_rx_req_rd), + .s_ctrlport_req_addr (ctrlport_rx_req_addr), + .s_ctrlport_req_data (ctrlport_rx_req_data), + .s_ctrlport_resp_ack (ctrlport_rx_resp_ack), + .s_ctrlport_resp_data (ctrlport_rx_resp_data), + .m_ctrlport_req_wr (ctrlport_err_rx_req_wr), + .m_ctrlport_req_addr (ctrlport_err_rx_req_addr), + .m_ctrlport_req_data (ctrlport_err_rx_req_data), + .m_ctrlport_req_has_time (ctrlport_err_rx_req_has_time), + .m_ctrlport_req_time (ctrlport_err_rx_req_time), + .m_ctrlport_req_portid (ctrlport_err_rx_req_portid), + .m_ctrlport_req_rem_epid (ctrlport_err_rx_req_rem_epid), + .m_ctrlport_req_rem_portid (ctrlport_err_rx_req_rem_portid), + .m_ctrlport_resp_ack (ctrlport_err_rx_resp_ack), + .radio_time (radio_time), + .radio_rx_data (radio_rx_data_mux), + .radio_rx_stb (radio_rx_stb_mux), + .radio_rx_running (radio_rx_running), + .m_axis_tdata (m_axis_tdata), + .m_axis_tlast (m_axis_tlast), + .m_axis_tvalid (m_axis_tvalid), + .m_axis_tready (m_axis_tready), + .m_axis_ttimestamp (m_axis_ttimestamp), + .m_axis_thas_time (m_axis_thas_time), + .m_axis_teob (m_axis_teob) + ); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/radio_rx_core.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/radio_rx_core.v new file mode 100644 index 000000000..ee7774fd7 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/radio_rx_core.v @@ -0,0 +1,521 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: radio_rx_core +// +// Description: +// +// This module contains the core Rx radio acquisition logic. It retrieves +// sample data from the radio interface, as indicated by the radio's strobe +// signal, and outputs the data via AXI-Stream. +// +// The receiver is operated by writing a time (optionally) to the +// REG_RX_CMD_TIME_* registers and a number of words (optionally) to +// REG_RX_CMD_NUM_WORDS_* registers followed by writing a command word to +// REG_RX_CMD. The command word indicates whether it is a finite ("num samps +// and done") or continuous acquisition and whether or not the acquisition +// should start at the time indicated byREG_RX_CMD_TIME_*. A stop command will +// stop any acquisition that's waiting to start or is in progress. +// +// The REG_RX_MAX_WORDS_PER_PKT and REG_RX_ERR_* registers should be +// initialized prior to the first acquisition. +// +// Parameters: +// +// SAMP_W : Width of a radio sample +// NSPC : Number of radio samples per radio clock cycle +// +`default_nettype none + + +module radio_rx_core #( + parameter SAMP_W = 32, + parameter NSPC = 1 +) ( + input wire radio_clk, + input wire radio_rst, + + + //--------------------------------------------------------------------------- + // Control Interface + //--------------------------------------------------------------------------- + + // Slave (Register Reads and Writes) + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + output reg s_ctrlport_resp_ack = 1'b0, + output reg [31:0] s_ctrlport_resp_data, + + // Master (Error Reporting) + output reg m_ctrlport_req_wr = 1'b0, + output reg [19:0] m_ctrlport_req_addr, + output reg [31:0] m_ctrlport_req_data, + output wire m_ctrlport_req_has_time, + output reg [63:0] m_ctrlport_req_time, + output wire [ 9:0] m_ctrlport_req_portid, + output wire [15:0] m_ctrlport_req_rem_epid, + output wire [ 9:0] m_ctrlport_req_rem_portid, + input wire m_ctrlport_resp_ack, + + + //--------------------------------------------------------------------------- + // Radio Interface + //--------------------------------------------------------------------------- + + input wire [63:0] radio_time, + + input wire [SAMP_W*NSPC-1:0] radio_rx_data, + input wire radio_rx_stb, + + // Status indicator (true when receiving) + output wire radio_rx_running, + + + //--------------------------------------------------------------------------- + // AXI-Stream Data Output + //--------------------------------------------------------------------------- + + output wire [SAMP_W*NSPC-1:0] m_axis_tdata, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready, + // Sideband info + output wire [ 63:0] m_axis_ttimestamp, + output wire m_axis_thas_time, + output wire m_axis_teob +); + + `include "rfnoc_block_radio_regs.vh" + `include "../../core/rfnoc_chdr_utils.vh" + + localparam NUM_WORDS_LEN = RX_CMD_NUM_WORDS_LEN; + + + //--------------------------------------------------------------------------- + // Register Read/Write Logic + //--------------------------------------------------------------------------- + + reg reg_cmd_valid = 0; // Indicates when the CMD_FIFO has been written + reg [ RX_CMD_LEN-1:0] reg_cmd_word = 0; // Command to execute + reg [NUM_WORDS_LEN-1:0] reg_cmd_num_words = 0; // Number of words for the command + reg [ 63:0] reg_cmd_time = 0; // Time for the command + reg reg_cmd_timed = 0; // Indicates if this is a timed command + reg [ 31:0] reg_max_pkt_len = 64; // Maximum words per packet + reg [ 9:0] reg_error_portid = 0; // Port ID to use for error reporting + reg [ 15:0] reg_error_rem_epid = 0; // Remote EPID to use for error reporting + reg [ 9:0] reg_error_rem_portid = 0; // Remote port ID to use for error reporting + reg [ 19:0] reg_error_addr = 0; // Address to use for error reporting + reg reg_has_time = 1; // Whether or not to use timestamps on data + + wire [15:0] cmd_fifo_space; // Empty space in the command FIFO + reg cmd_stop = 0; // Indicates a full stop request + wire cmd_stop_ack; // Acknowledgment that a stop has completed + reg clear_fifo = 0; // Signal to clear the command FIFO + + assign m_axis_thas_time = reg_has_time; + + always @(posedge radio_clk) begin + if (radio_rst) begin + s_ctrlport_resp_ack <= 0; + reg_cmd_valid <= 0; + reg_cmd_word <= 0; + reg_cmd_num_words <= 0; + reg_cmd_time <= 0; + reg_cmd_timed <= 0; + reg_max_pkt_len <= 64; + reg_error_portid <= 0; + reg_error_rem_epid <= 0; + reg_error_rem_portid <= 0; + reg_error_addr <= 0; + reg_has_time <= 1; + clear_fifo <= 0; + cmd_stop <= 0; + end else begin + // Default assignments + s_ctrlport_resp_ack <= 0; + s_ctrlport_resp_data <= 0; + reg_cmd_valid <= 0; + clear_fifo <= 0; + + // Clear stop register when we enter the STOP state + if (cmd_stop_ack) cmd_stop <= 1'b0; + + // Handle register writes + if (s_ctrlport_req_wr) begin + case (s_ctrlport_req_addr) + REG_RX_CMD: begin + // All commands go into the command FIFO except STOP + reg_cmd_valid <= (s_ctrlport_req_data[RX_CMD_LEN-1:0] != RX_CMD_STOP); + reg_cmd_word <= s_ctrlport_req_data[RX_CMD_LEN-1:0]; + reg_cmd_timed <= s_ctrlport_req_data[RX_CMD_TIMED_POS]; + s_ctrlport_resp_ack <= 1; + + // cmd_stop must remain asserted until it has completed + if (!cmd_stop || cmd_stop_ack) begin + cmd_stop <= (s_ctrlport_req_data[RX_CMD_LEN-1:0] == RX_CMD_STOP); + end + clear_fifo <= (s_ctrlport_req_data[RX_CMD_LEN-1:0] == RX_CMD_STOP); + end + REG_RX_CMD_NUM_WORDS_LO: begin + reg_cmd_num_words[31:0] <= s_ctrlport_req_data; + s_ctrlport_resp_ack <= 1; + end + REG_RX_CMD_NUM_WORDS_HI: begin + reg_cmd_num_words[NUM_WORDS_LEN-1:32] <= s_ctrlport_req_data[NUM_WORDS_LEN-32-1:0]; + s_ctrlport_resp_ack <= 1; + end + REG_RX_CMD_TIME_LO: begin + reg_cmd_time[31:0] <= s_ctrlport_req_data; + s_ctrlport_resp_ack <= 1; + end + REG_RX_CMD_TIME_HI: begin + reg_cmd_time[63:32] <= s_ctrlport_req_data; + s_ctrlport_resp_ack <= 1; + end + REG_RX_MAX_WORDS_PER_PKT: begin + reg_max_pkt_len <= s_ctrlport_req_data; + s_ctrlport_resp_ack <= 1; + end + REG_RX_ERR_PORT: begin + reg_error_portid <= s_ctrlport_req_data[9:0]; + s_ctrlport_resp_ack <= 1; + end + REG_RX_ERR_REM_PORT: begin + reg_error_rem_portid <= s_ctrlport_req_data[9:0]; + s_ctrlport_resp_ack <= 1; + end + REG_RX_ERR_REM_EPID: begin + reg_error_rem_epid <= s_ctrlport_req_data[15:0]; + s_ctrlport_resp_ack <= 1; + end + REG_RX_ERR_ADDR: begin + reg_error_addr <= s_ctrlport_req_data[19:0]; + s_ctrlport_resp_ack <= 1; + end + REG_RX_HAS_TIME: begin + reg_has_time <= s_ctrlport_req_data[0:0]; + s_ctrlport_resp_ack <= 1; + end + endcase + end + + // Handle register reads + if (s_ctrlport_req_rd) begin + case (s_ctrlport_req_addr) + REG_RX_STATUS: begin + s_ctrlport_resp_data[CMD_FIFO_SPACE_POS+:CMD_FIFO_SPACE_LEN] + <= cmd_fifo_space[CMD_FIFO_SPACE_LEN-1:0]; + s_ctrlport_resp_ack <= 1; + end + REG_RX_CMD: begin + s_ctrlport_resp_data[RX_CMD_LEN-1:0] <= reg_cmd_word; + s_ctrlport_resp_data[RX_CMD_TIMED_POS] <= reg_cmd_timed; + s_ctrlport_resp_ack <= 1; + end + REG_RX_CMD_NUM_WORDS_LO: begin + s_ctrlport_resp_data <= reg_cmd_num_words[31:0]; + s_ctrlport_resp_ack <= 1; + end + REG_RX_CMD_NUM_WORDS_HI: begin + s_ctrlport_resp_data[NUM_WORDS_LEN-32-1:0] <= reg_cmd_num_words[NUM_WORDS_LEN-1:32]; + s_ctrlport_resp_ack <= 1; + end + REG_RX_CMD_TIME_LO: begin + s_ctrlport_resp_data <= reg_cmd_time[31:0]; + s_ctrlport_resp_ack <= 1; + end + REG_RX_CMD_TIME_HI: begin + s_ctrlport_resp_data <= reg_cmd_time[63:32]; + s_ctrlport_resp_ack <= 1; + end + REG_RX_MAX_WORDS_PER_PKT: begin + s_ctrlport_resp_data <= reg_max_pkt_len; + s_ctrlport_resp_ack <= 1; + end + REG_RX_ERR_PORT: begin + s_ctrlport_resp_data[9:0] <= reg_error_portid; + s_ctrlport_resp_ack <= 1; + end + REG_RX_ERR_REM_PORT: begin + s_ctrlport_resp_data[9:0] <= reg_error_rem_portid; + s_ctrlport_resp_ack <= 1; + end + REG_RX_ERR_REM_EPID: begin + s_ctrlport_resp_data[15:0] <= reg_error_rem_epid; + s_ctrlport_resp_ack <= 1; + end + REG_RX_ERR_ADDR: begin + s_ctrlport_resp_data[19:0] <= reg_error_addr; + s_ctrlport_resp_ack <= 1; + end + REG_RX_DATA: begin + s_ctrlport_resp_data <= radio_rx_data; + s_ctrlport_resp_ack <= 1; + end + REG_RX_HAS_TIME: begin + s_ctrlport_resp_data[0] <= reg_has_time; + s_ctrlport_resp_ack <= 1; + end + endcase + end + + end + end + + + //--------------------------------------------------------------------------- + // Command Queue + //--------------------------------------------------------------------------- + + wire [ 63:0] cmd_time; // Time for next start of command + wire cmd_timed; // Command is timed (use cmd_time) + wire [NUM_WORDS_LEN-1:0] cmd_num_words; // Number of words for next command + wire cmd_continuous; // Command is continuous (ignore cmd_num_words) + wire cmd_valid; // cmd_* is a valid command + wire cmd_done; // Command has completed and can be popped from FIFO + + axi_fifo #( + .WIDTH (64 + 1 + NUM_WORDS_LEN + 1), + .SIZE (5) // Ideally, this size will lead to an SRL-based FIFO + ) cmd_fifo ( + .clk (radio_clk), + .reset (radio_rst), + .clear (clear_fifo), + .i_tdata ({ reg_cmd_time, reg_cmd_timed, reg_cmd_num_words, (reg_cmd_word == RX_CMD_CONTINUOUS) }), + .i_tvalid (reg_cmd_valid), + .i_tready (), + .o_tdata ({ cmd_time, cmd_timed, cmd_num_words, cmd_continuous }), + .o_tvalid (cmd_valid), + .o_tready (cmd_done), + .space (cmd_fifo_space), + .occupied () + ); + + + //--------------------------------------------------------------------------- + // Receiver State Machine + //--------------------------------------------------------------------------- + + // FSM state values + localparam ST_IDLE = 0; + localparam ST_TIME_CHECK = 1; + localparam ST_RUNNING = 2; + localparam ST_STOP = 3; + localparam ST_REPORT_ERR = 4; + localparam ST_REPORT_ERR_WAIT = 5; + + reg [ 2:0] state = ST_IDLE; // Current state + reg [NUM_WORDS_LEN-1:0] words_left; // Words left in current command + reg [ 31:0] words_left_pkt; // Words left in current packet + reg first_word = 1'b1; // Next word is first in packet + reg [ 15:0] seq_num = 0; // Sequence number (packet count) + reg [ 63:0] error_time; // Time at which overflow occurred + reg [ERR_RX_CODE_W-1:0] error_code; // Error code register + + // Output FIFO signals + wire [ 15:0] out_fifo_space; + reg [SAMP_W*NSPC-1:0] out_fifo_tdata; + reg out_fifo_tlast; + reg out_fifo_tvalid = 1'b0; + reg [ 63:0] out_fifo_timestamp; + reg out_fifo_teob; + reg out_fifo_almost_full; + + reg [63:0] radio_time_low_samp, radio_time_hi_samp; + reg time_now, time_past; + + // All ctrlport requests have a time + assign m_ctrlport_req_has_time = 1'b1; + + // Acknowledge STOP requests and pop the command FIFO in the STOP state + assign cmd_stop_ack = (state == ST_STOP); + assign cmd_done = (state == ST_STOP); + + always @(posedge radio_clk) begin + if (radio_rst) begin + state <= ST_IDLE; + out_fifo_tvalid <= 1'b0; + seq_num <= 'd0; + m_ctrlport_req_wr <= 1'b0; + first_word <= 1'b1; + end else begin + // Default assignments + out_fifo_tvalid <= 1'b0; + out_fifo_tlast <= 1'b0; + out_fifo_teob <= 1'b0; + m_ctrlport_req_wr <= 1'b0; + + if (radio_rx_stb) begin + // Get the time for the low sample and the high sample of the radio + // word (needed when NISPC > 1). Compensate for the delay required to + // check the time by adding 3 clock cycles worth of samples. + radio_time_low_samp <= (radio_time + 3*NSPC); + radio_time_hi_samp <= (radio_time + 3*NSPC + (NSPC-1)); + + // Register the time comparisons so they don't become the critical path + time_now <= (cmd_time >= radio_time_low_samp && + cmd_time <= radio_time_hi_samp); + time_past <= (cmd_time < radio_time_low_samp); + end + + case (state) + ST_IDLE : begin + // Wait for a new command to arrive and allow a cycle for the time + // comparisons to update. + if (cmd_valid && radio_rx_stb) begin + state <= ST_TIME_CHECK; + end else if (cmd_stop) begin + state <= ST_STOP; + end + first_word <= 1'b1; + end + + ST_TIME_CHECK : begin + if (cmd_stop) begin + // Nothing to do but stop (timed STOP commands are not supported) + state <= ST_STOP; + end else if (cmd_timed && time_past && radio_rx_stb) begin + // Got this command later than its execution time + //synthesis translate_off + $display("WARNING: radio_rx_core: Late command error"); + //synthesis translate_on + error_code <= ERR_RX_LATE_CMD; + error_time <= radio_time; + state <= ST_REPORT_ERR; + end else if (!cmd_timed || (time_now && radio_rx_stb)) begin + // Either it's time to run this command or it should run + // immediately. + words_left <= cmd_num_words; + words_left_pkt <= reg_max_pkt_len; + state <= ST_RUNNING; + end + end + + ST_RUNNING : begin + if (radio_rx_stb) begin + // Output the next word + out_fifo_tvalid <= 1'b1; + out_fifo_tdata <= radio_rx_data; + if (first_word) begin + out_fifo_timestamp <= radio_time; + first_word <= 1'b0; + end + + // Update word counters + words_left <= words_left - 1; + words_left_pkt <= words_left_pkt - 1; + + if ((words_left == 1 && !cmd_continuous) || cmd_stop) begin + // This command has finished, or we've been asked to stop. + state <= ST_STOP; + out_fifo_tlast <= 1'b1; + out_fifo_teob <= 1'b1; + first_word <= 1'b1; + end else if (words_left_pkt == 1) begin + // We've finished building a packet + seq_num <= seq_num + 1; + words_left_pkt <= reg_max_pkt_len; + out_fifo_tlast <= 1'b1; + first_word <= 1'b1; + end + + // Check for overflow. Note that we've left enough room in the + // output FIFO so that we can end the packet cleanly. + if (out_fifo_almost_full) begin + // End the command and terminate packet early + //synthesis translate_off + $display("WARNING: radio_rx_core: Overrun error"); + //synthesis translate_on + out_fifo_tlast <= 1'b1; + out_fifo_teob <= 1'b1; + seq_num <= seq_num + 1; + error_time <= radio_time; + error_code <= ERR_RX_OVERRUN; + state <= ST_REPORT_ERR; + end + + end + end + + ST_STOP : begin + // This single-cycle state allows time for STOP to be acknowledged + // and for the command FIFO to be popped. + state <= ST_IDLE; + end + + ST_REPORT_ERR : begin + // Setup write of error code + m_ctrlport_req_wr <= 1'b1; + m_ctrlport_req_data <= 0; + m_ctrlport_req_data[ERR_RX_CODE_W-1:0] <= error_code; + m_ctrlport_req_addr <= reg_error_addr; + m_ctrlport_req_time <= error_time; + state <= ST_REPORT_ERR_WAIT; + end + + ST_REPORT_ERR_WAIT : begin + // Wait for write of error code and timestamp to complete + if (m_ctrlport_resp_ack) begin + state <= ST_STOP; + end + end + + default : state <= ST_IDLE; + endcase + end + end + + + assign radio_rx_running = (state == ST_RUNNING); // We're actively acquiring + + // Directly connect the port ID, remote port ID, and remote EPID since they + // are only used for error reporting. + assign m_ctrlport_req_portid = reg_error_portid; + assign m_ctrlport_req_rem_epid = reg_error_rem_epid; + assign m_ctrlport_req_rem_portid = reg_error_rem_portid; + + + //--------------------------------------------------------------------------- + // Output FIFO + //--------------------------------------------------------------------------- + // + // Here we buffer output samples and monitor FIFO fullness to be able to + // detect overflows. + // + //--------------------------------------------------------------------------- + + axi_fifo #( + .WIDTH (1+64+1+SAMP_W*NSPC), + .SIZE (5) // Ideally, this size will lead to an SRL-based FIFO + ) output_fifo ( + .clk (radio_clk), + .reset (radio_rst), + .clear (1'b0), + .i_tdata ({out_fifo_teob, out_fifo_timestamp, out_fifo_tlast, out_fifo_tdata}), + .i_tvalid (out_fifo_tvalid), + .i_tready (), + .o_tdata ({m_axis_teob, m_axis_ttimestamp, m_axis_tlast, m_axis_tdata}), + .o_tvalid (m_axis_tvalid), + .o_tready (m_axis_tready), + .space (out_fifo_space), + .occupied () + ); + + // Create a register to indicate if the output FIFO is about to overflow + always @(posedge radio_clk) begin + if (radio_rst) begin + out_fifo_almost_full <= 1'b0; + end else begin + out_fifo_almost_full <= (out_fifo_space < 5); + end + end + + +endmodule + + +`default_nettype wire diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/radio_tx_core.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/radio_tx_core.v new file mode 100644 index 000000000..d40db5122 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/radio_tx_core.v @@ -0,0 +1,417 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: radio_tx_core +// +// Description: +// +// This module contains the core Tx radio data-path logic. It receives samples +// over AXI-Stream that it then sends to the radio interface coincident with a +// strobe signal that must be provided by the radio interface. +// +// There are no registers for starting or stopping the transmitter. It is +// operated simply by providing data packets via its AXI-Stream data interface. +// The end-of-burst (EOB) signal is used to indicate when the transmitter is +// allowed to stop transmitting. Packet timestamps can be used to indicate when +// transmission should start. +// +// Care must be taken to provide data to the transmitter at a rate that is +// faster than the radio needs it so that underflows do not occur. Similarly, +// timed packets must be delivered before the timestamp expires. If a packet +// arrives late, then it will be dropped and the error will be reported via the +// CTRL port interface. +// +// Parameters: +// +// SAMP_W : Width of a radio sample +// NSPC : Number of radio samples per radio clock cycle +// + + +module radio_tx_core #( + parameter SAMP_W = 32, + parameter NSPC = 1 +) ( + input wire radio_clk, + input wire radio_rst, + + + //--------------------------------------------------------------------------- + // Control Interface + //--------------------------------------------------------------------------- + + // Slave (Register Reads and Writes) + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + output reg s_ctrlport_resp_ack = 1'b0, + output reg [31:0] s_ctrlport_resp_data, + + // Master (Error Reporting) + output reg m_ctrlport_req_wr = 1'b0, + output reg [19:0] m_ctrlport_req_addr, + output reg [31:0] m_ctrlport_req_data, + output wire m_ctrlport_req_has_time, + output reg [63:0] m_ctrlport_req_time, + output wire [ 9:0] m_ctrlport_req_portid, + output wire [15:0] m_ctrlport_req_rem_epid, + output wire [ 9:0] m_ctrlport_req_rem_portid, + input wire m_ctrlport_resp_ack, + + + //--------------------------------------------------------------------------- + // Radio Interface + //--------------------------------------------------------------------------- + + input wire [63:0] radio_time, + + output wire [SAMP_W*NSPC-1:0] radio_tx_data, + input wire radio_tx_stb, + + // Status indicator (true when transmitting) + output wire radio_tx_running, + + + //--------------------------------------------------------------------------- + // AXI-Stream Data Input + //--------------------------------------------------------------------------- + + input wire [SAMP_W*NSPC-1:0] s_axis_tdata, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + // Sideband info + input wire [ 63:0] s_axis_ttimestamp, + input wire s_axis_thas_time, + input wire s_axis_teob +); + + `include "rfnoc_block_radio_regs.vh" + `include "../../core/rfnoc_chdr_utils.vh" + + + //--------------------------------------------------------------------------- + // Register Read/Write Logic + //--------------------------------------------------------------------------- + + reg [SAMP_W-1:0] reg_idle_value = 0; // Value to output when transmitter is idle + reg [ 9:0] reg_error_portid = 0; // Port ID to use for error reporting + reg [ 15:0] reg_error_rem_epid = 0; // Remote EPID to use for error reporting + reg [ 9:0] reg_error_rem_portid = 0; // Remote port ID to use for error reporting + reg [ 19:0] reg_error_addr = 0; // Address to use for error reporting + + reg [TX_ERR_POLICY_LEN-1:0] reg_policy = TX_ERR_POLICY_PACKET; + + always @(posedge radio_clk) begin + if (radio_rst) begin + s_ctrlport_resp_ack <= 0; + reg_idle_value <= 0; + reg_error_portid <= 0; + reg_error_rem_epid <= 0; + reg_error_rem_portid <= 0; + reg_error_addr <= 0; + reg_policy <= TX_ERR_POLICY_PACKET; + end else begin + // Default assignments + s_ctrlport_resp_ack <= 0; + s_ctrlport_resp_data <= 0; + + // Handle register writes + if (s_ctrlport_req_wr) begin + case (s_ctrlport_req_addr) + REG_TX_IDLE_VALUE: begin + reg_idle_value <= s_ctrlport_req_data[SAMP_W-1:0]; + s_ctrlport_resp_ack <= 1; + end + REG_TX_ERROR_POLICY: begin + // Only allow valid configurations + case (s_ctrlport_req_data[TX_ERR_POLICY_LEN-1:0]) + TX_ERR_POLICY_PACKET : reg_policy <= TX_ERR_POLICY_PACKET; + TX_ERR_POLICY_BURST : reg_policy <= TX_ERR_POLICY_BURST; + default : reg_policy <= TX_ERR_POLICY_PACKET; + endcase + s_ctrlport_resp_ack <= 1; + end + REG_TX_ERR_PORT: begin + reg_error_portid <= s_ctrlport_req_data[9:0]; + s_ctrlport_resp_ack <= 1; + end + REG_TX_ERR_REM_PORT: begin + reg_error_rem_portid <= s_ctrlport_req_data[9:0]; + s_ctrlport_resp_ack <= 1; + end + REG_TX_ERR_REM_EPID: begin + reg_error_rem_epid <= s_ctrlport_req_data[15:0]; + s_ctrlport_resp_ack <= 1; + end + REG_TX_ERR_ADDR: begin + reg_error_addr <= s_ctrlport_req_data[19:0]; + s_ctrlport_resp_ack <= 1; + end + endcase + end + + // Handle register reads + if (s_ctrlport_req_rd) begin + case (s_ctrlport_req_addr) + REG_TX_IDLE_VALUE: begin + s_ctrlport_resp_data[SAMP_W-1:0] <= reg_idle_value; + s_ctrlport_resp_ack <= 1; + end + REG_TX_ERROR_POLICY: begin + s_ctrlport_resp_data[TX_ERR_POLICY_LEN-1:0] <= reg_policy; + s_ctrlport_resp_ack <= 1; + end + REG_TX_ERR_PORT: begin + s_ctrlport_resp_data[9:0] <= reg_error_portid; + s_ctrlport_resp_ack <= 1; + end + REG_TX_ERR_REM_PORT: begin + s_ctrlport_resp_data[9:0] <= reg_error_rem_portid; + s_ctrlport_resp_ack <= 1; + end + REG_TX_ERR_REM_EPID: begin + s_ctrlport_resp_data[15:0] <= reg_error_rem_epid; + s_ctrlport_resp_ack <= 1; + end + REG_TX_ERR_ADDR: begin + s_ctrlport_resp_data[19:0] <= reg_error_addr; + s_ctrlport_resp_ack <= 1; + end + endcase + end + end + end + + + //--------------------------------------------------------------------------- + // Transmitter State Machine + //--------------------------------------------------------------------------- + + // FSM state values + localparam ST_IDLE = 0; + localparam ST_TIME_CHECK = 1; + localparam ST_TRANSMIT = 2; + localparam ST_POLICY_WAIT = 3; + + reg [1:0] state = ST_IDLE; + + reg sop = 1'b1; // Start of packet + + reg [ERR_TX_CODE_W-1:0] new_error_code; + reg [ 63:0] new_error_time; + reg new_error_valid = 1'b0; + + reg time_now, time_past; + + + always @(posedge radio_clk) begin + if (radio_rst) begin + state <= ST_IDLE; + sop <= 1'b1; + new_error_valid <= 1'b0; + end else begin + new_error_valid <= 1'b0; + + // Register time comparisons so they don't become the critical path + time_now <= (radio_time == s_axis_ttimestamp); + time_past <= (radio_time > s_axis_ttimestamp); + + // Track if the next word will be the start of a packet (sop) + if (s_axis_tvalid && s_axis_tready) begin + sop <= s_axis_tlast; + end + + case (state) + ST_IDLE : begin + // Wait for a new packet to arrive and allow a cycle for the time + // comparisons to update. + if (s_axis_tvalid) begin + state <= ST_TIME_CHECK; + end + end + + ST_TIME_CHECK : begin + if (!s_axis_thas_time || time_now) begin + // We have a new packet without a timestamp, or a new packet + // whose time has arrived. + state <= ST_TRANSMIT; + end else if (time_past) begin + // We have a new packet with a timestamp, but the time has passed. + //synthesis translate off + $display("WARNING: radio_tx_core: Late data error"); + //synthesis translate_on + new_error_code <= ERR_TX_LATE_DATA; + new_error_time <= radio_time; + new_error_valid <= 1'b1; + state <= ST_POLICY_WAIT; + end + end + + ST_TRANSMIT : begin + if (radio_tx_stb) begin + if (!s_axis_tvalid) begin + // The radio strobed for new data but we don't have any to give + //synthesis translate off + $display("WARNING: radio_tx_core: Underrun error"); + //synthesis translate_on + new_error_code <= ERR_TX_UNDERRUN; + new_error_time <= radio_time; + new_error_valid <= 1'b1; + state <= ST_POLICY_WAIT; + end else if (s_axis_tlast && s_axis_teob) begin + // We're done with this burst of packets, so acknowledge EOB and + // go back to idle. + new_error_code <= ERR_TX_EOB_ACK; + new_error_time <= radio_time; + new_error_valid <= 1'b1; + state <= ST_IDLE; + end + end + end + + ST_POLICY_WAIT : begin + // If we came here from ST_TIME_CHECK or ST_TRANSMIT and we're in the + // middle of a packet then we just wait until we reach the end of the + // packet. + if (s_axis_tvalid && s_axis_tlast) begin + // We're either at the end of a packet or between packets + if (reg_policy == TX_ERR_POLICY_PACKET || + (reg_policy == TX_ERR_POLICY_BURST && s_axis_teob)) begin + state <= ST_IDLE; + end + + // If we came from ST_TRANSMIT and we happen to already be between + // packets (i.e., we underflowed while waiting for the next packet). + end else if (!s_axis_tvalid && sop) begin + if (reg_policy == TX_ERR_POLICY_PACKET) state <= ST_IDLE; + end + end + + default : state <= ST_IDLE; + endcase + end + end + + + // Output the current sample whenever we're transmitting and the sample is + // valid. Otherwise, output the idle value. + assign radio_tx_data = (s_axis_tvalid && state == ST_TRANSMIT) ? + s_axis_tdata : + {NSPC{reg_idle_value[SAMP_W-1:0]}}; + + // Read packet in the transmit state or dump it in the error state + assign s_axis_tready = (radio_tx_stb && (state == ST_TRANSMIT)) || + (state == ST_POLICY_WAIT); + + // Indicate whether Tx interface is actively transmitting + assign radio_tx_running = (state == ST_TRANSMIT); + + + //--------------------------------------------------------------------------- + // Error FIFO + //--------------------------------------------------------------------------- + // + // This FIFO queues up errors in case we get multiple errors in a row faster + // than they can be reported. If the FIFO fills then new errors will be + // ignored. + // + //--------------------------------------------------------------------------- + + // Error information + wire [ERR_TX_CODE_W-1:0] next_error_code; + wire [ 63:0] next_error_time; + wire next_error_valid; + reg next_error_ready = 1'b0; + + wire new_error_ready; + + axi_fifo_short #( + .WIDTH (64 + ERR_TX_CODE_W) + ) error_fifo ( + .clk (radio_clk), + .reset (radio_rst), + .clear (1'b0), + .i_tdata ({new_error_time, new_error_code}), + .i_tvalid (new_error_valid & new_error_ready), // Mask with ready to prevent FIFO corruption + .i_tready (new_error_ready), + .o_tdata ({next_error_time, next_error_code}), + .o_tvalid (next_error_valid), + .o_tready (next_error_ready), + .space (), + .occupied () + ); + + //synthesis translate_off + // Output a message if the error FIFO overflows + always @(posedge radio_clk) begin + if (new_error_valid && !new_error_ready) begin + $display("WARNING: Tx error report dropped!"); + end + end + //synthesis translate_on + + + //--------------------------------------------------------------------------- + // Error Reporting State Machine + //--------------------------------------------------------------------------- + // + // This state machine reports errors that have been queued up in the error + // FIFO. + // + //--------------------------------------------------------------------------- + + localparam ST_ERR_IDLE = 0; + localparam ST_ERR_CODE = 1; + + reg [0:0] err_state = ST_ERR_IDLE; + + // All ctrlport requests have a time + assign m_ctrlport_req_has_time = 1'b1; + + always @(posedge radio_clk) begin + if (radio_rst) begin + m_ctrlport_req_wr <= 1'b0; + err_state <= ST_ERR_IDLE; + next_error_ready <= 1'b0; + end else begin + m_ctrlport_req_wr <= 1'b0; + next_error_ready <= 1'b0; + + case (err_state) + ST_ERR_IDLE : begin + if (next_error_valid) begin + // Setup write of error code + m_ctrlport_req_wr <= 1'b1; + m_ctrlport_req_addr <= reg_error_addr; + m_ctrlport_req_data <= {{(32-ERR_TX_CODE_W){1'b0}}, next_error_code}; + m_ctrlport_req_time <= next_error_time; + next_error_ready <= 1'b1; + err_state <= ST_ERR_CODE; + end + end + + ST_ERR_CODE : begin + // Wait for write of error code and timestamp + if (m_ctrlport_resp_ack) begin + err_state <= ST_ERR_IDLE; + end + end + + default : err_state <= ST_ERR_IDLE; + endcase + end + end + + + // Directly connect the port ID, remote port ID, remote EPID since they are + // only used for error reporting. + assign m_ctrlport_req_portid = reg_error_portid; + assign m_ctrlport_req_rem_epid = reg_error_rem_epid; + assign m_ctrlport_req_rem_portid = reg_error_rem_portid; + + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rfnoc_block_radio.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rfnoc_block_radio.v new file mode 100644 index 000000000..a97b141c0 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rfnoc_block_radio.v @@ -0,0 +1,546 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_radio +// +// Description: This is the top-level file for the RFNoC radio block. +// +// Parameters: +// +// THIS_PORTID : CTRL port ID to which this block is connected +// CHDR_W : CHDR AXI-Stream data bus width +// NIPC : Number of radio samples per radio clock cycle +// ITEM_W : Radio sample width +// NUM_PORTS : Number of radio channels (RX/TX pairs) +// MTU : Maximum transmission unit (i.e., maximum packet size) +// in CHDR words is 2**MTU. +// CTRL_FIFO_SIZE : Size of the Control Port slave FIFO. This affects the +// number of outstanding commands that can be pending. +// PERIPH_BASE_ADDR : CTRL port peripheral window base address +// PERIPH_ADDR_W : CTRL port peripheral address space = 2**PERIPH_ADDR_W +// + + +module rfnoc_block_radio #( + parameter THIS_PORTID = 0, + parameter CHDR_W = 64, + parameter NIPC = 1, + parameter ITEM_W = 32, + parameter NUM_PORTS = 2, + parameter MTU = 10, + parameter CTRL_FIFO_SIZE = 9, + parameter PERIPH_BASE_ADDR = 20'h80000, + parameter PERIPH_ADDR_W = 19 +) ( + //--------------------------------------------------------------------------- + // AXIS CHDR Port + //--------------------------------------------------------------------------- + + input wire rfnoc_chdr_clk, + + // CHDR inputs from framework + input wire [CHDR_W*NUM_PORTS-1:0] s_rfnoc_chdr_tdata, + input wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tlast, + input wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tvalid, + output wire [ NUM_PORTS-1:0] s_rfnoc_chdr_tready, + + // CHDR outputs to framework + output wire [CHDR_W*NUM_PORTS-1:0] m_rfnoc_chdr_tdata, + output wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tlast, + output wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tvalid, + input wire [ NUM_PORTS-1:0] m_rfnoc_chdr_tready, + + // Backend interface + input wire [511:0] rfnoc_core_config, + output wire [511:0] rfnoc_core_status, + + + //--------------------------------------------------------------------------- + // AXIS CTRL Port + //--------------------------------------------------------------------------- + + input wire rfnoc_ctrl_clk, + + // CTRL port requests from framework + input wire [31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + + // CTRL port requests to framework + output wire [31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready, + + + //--------------------------------------------------------------------------- + // CTRL Port Peripheral Interface + //--------------------------------------------------------------------------- + + output wire m_ctrlport_req_wr, + output wire m_ctrlport_req_rd, + output wire [19:0] m_ctrlport_req_addr, + output wire [31:0] m_ctrlport_req_data, + output wire [ 3:0] m_ctrlport_req_byte_en, + output wire m_ctrlport_req_has_time, + output wire [63:0] m_ctrlport_req_time, + input wire m_ctrlport_resp_ack, + input wire [ 1:0] m_ctrlport_resp_status, + input wire [31:0] m_ctrlport_resp_data, + + + //--------------------------------------------------------------------------- + // Radio Interface + //--------------------------------------------------------------------------- + + input wire radio_clk, + + // Timekeeper interface + input wire [63:0] radio_time, + + // Radio Rx interface + input wire [(ITEM_W*NIPC)*NUM_PORTS-1:0] radio_rx_data, + input wire [ NUM_PORTS-1:0] radio_rx_stb, + output wire [ NUM_PORTS-1:0] radio_rx_running, + + // Radio Tx interface + output wire [(ITEM_W*NIPC)*NUM_PORTS-1:0] radio_tx_data, + input wire [ NUM_PORTS-1:0] radio_tx_stb, + output wire [ NUM_PORTS-1:0] radio_tx_running +); + + `include "rfnoc_block_radio_regs.vh" + `include "../../core/rfnoc_axis_ctrl_utils.vh" + + localparam NOC_ID = 32'h12AD1000; + localparam RADIO_W = NIPC*ITEM_W; + + + // Radio Tx data stream + wire [RADIO_W*NUM_PORTS-1:0] axis_tx_tdata; + wire [ NUM_PORTS-1:0] axis_tx_tlast; + wire [ NUM_PORTS-1:0] axis_tx_tvalid; + wire [ NUM_PORTS-1:0] axis_tx_tready; + wire [ 64*NUM_PORTS-1:0] axis_tx_ttimestamp; + wire [ NUM_PORTS-1:0] axis_tx_thas_time; + wire [ NUM_PORTS-1:0] axis_tx_teob; + + // Radio Rx data stream + wire [RADIO_W*NUM_PORTS-1:0] axis_rx_tdata; + wire [ NUM_PORTS-1:0] axis_rx_tlast; + wire [ NUM_PORTS-1:0] axis_rx_tvalid; + wire [ NUM_PORTS-1:0] axis_rx_tready; + wire [ 64*NUM_PORTS-1:0] axis_rx_ttimestamp; + wire [ NUM_PORTS-1:0] axis_rx_thas_time; + wire [ NUM_PORTS-1:0] axis_rx_teob; + + // Control port signals used for register access (NoC shell masters user logic) + wire ctrlport_reg_req_wr; + wire ctrlport_reg_req_rd; + wire [19:0] ctrlport_reg_req_addr; + wire ctrlport_reg_has_time; + wire [63:0] ctrlport_reg_time; + wire [31:0] ctrlport_reg_req_data; + wire [31:0] ctrlport_reg_resp_data; + wire ctrlport_reg_resp_ack; + + // Control port signals used for error reporting (user logic masters to NoC shell) + wire ctrlport_err_req_wr; + wire [19:0] ctrlport_err_req_addr; + wire [ 9:0] ctrlport_err_req_portid; + wire [15:0] ctrlport_err_req_rem_epid; + wire [ 9:0] ctrlport_err_req_rem_portid; + wire [31:0] ctrlport_err_req_data; + wire ctrlport_err_req_has_time; + wire [63:0] ctrlport_err_req_time; + wire ctrlport_err_resp_ack; + + + //--------------------------------------------------------------------------- + // NoC Shell + //--------------------------------------------------------------------------- + + wire rfnoc_chdr_rst; + wire radio_rst; + + noc_shell_radio #( + .NOC_ID (NOC_ID), + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W), + .CTRLPORT_SLV_EN (1), + .CTRLPORT_MST_EN (1), + .CTRL_FIFO_SIZE (CTRL_FIFO_SIZE), + .NUM_DATA_I (NUM_PORTS), + .NUM_DATA_O (NUM_PORTS), + .ITEM_W (ITEM_W), + .NIPC (NIPC), + .PYLD_FIFO_SIZE (MTU), + .MTU (MTU) + ) noc_shell_radio_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .rfnoc_chdr_rst (rfnoc_chdr_rst), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk), + .rfnoc_ctrl_rst (), + .rfnoc_core_config (rfnoc_core_config), + .rfnoc_core_status (rfnoc_core_status), + .s_rfnoc_chdr_tdata (s_rfnoc_chdr_tdata), + .s_rfnoc_chdr_tlast (s_rfnoc_chdr_tlast), + .s_rfnoc_chdr_tvalid (s_rfnoc_chdr_tvalid), + .s_rfnoc_chdr_tready (s_rfnoc_chdr_tready), + .m_rfnoc_chdr_tdata (m_rfnoc_chdr_tdata), + .m_rfnoc_chdr_tlast (m_rfnoc_chdr_tlast), + .m_rfnoc_chdr_tvalid (m_rfnoc_chdr_tvalid), + .m_rfnoc_chdr_tready (m_rfnoc_chdr_tready), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready), + .ctrlport_clk (radio_clk), + .ctrlport_rst (radio_rst), + .m_ctrlport_req_wr (ctrlport_reg_req_wr), + .m_ctrlport_req_rd (ctrlport_reg_req_rd), + .m_ctrlport_req_addr (ctrlport_reg_req_addr), + .m_ctrlport_req_data (ctrlport_reg_req_data), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (ctrlport_reg_has_time), + .m_ctrlport_req_time (ctrlport_reg_time), + .m_ctrlport_resp_ack (ctrlport_reg_resp_ack), + .m_ctrlport_resp_status (AXIS_CTRL_STS_OKAY), + .m_ctrlport_resp_data (ctrlport_reg_resp_data), + .s_ctrlport_req_wr (ctrlport_err_req_wr), + .s_ctrlport_req_rd (1'b0), + .s_ctrlport_req_addr (ctrlport_err_req_addr), + .s_ctrlport_req_portid (ctrlport_err_req_portid), + .s_ctrlport_req_rem_epid (ctrlport_err_req_rem_epid), + .s_ctrlport_req_rem_portid (ctrlport_err_req_rem_portid), + .s_ctrlport_req_data (ctrlport_err_req_data), + .s_ctrlport_req_byte_en (4'hF), + .s_ctrlport_req_has_time (ctrlport_err_req_has_time), + .s_ctrlport_req_time (ctrlport_err_req_time), + .s_ctrlport_resp_ack (ctrlport_err_resp_ack), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (), + .axis_data_clk (radio_clk), + .axis_data_rst (radio_rst), + .m_axis_tdata (axis_tx_tdata), + .m_axis_tkeep (), // Radio only transmits full words + .m_axis_tlast (axis_tx_tlast), + .m_axis_tvalid (axis_tx_tvalid), + .m_axis_tready (axis_tx_tready), + .m_axis_ttimestamp (axis_tx_ttimestamp), + .m_axis_thas_time (axis_tx_thas_time), + .m_axis_teov (), + .m_axis_teob (axis_tx_teob), + .s_axis_tdata (axis_rx_tdata), + .s_axis_tkeep ({NUM_PORTS*NIPC{1'b1}}), // Radio only receives full words + .s_axis_tlast (axis_rx_tlast), + .s_axis_tvalid (axis_rx_tvalid), + .s_axis_tready (axis_rx_tready), + .s_axis_ttimestamp (axis_rx_ttimestamp), + .s_axis_thas_time (axis_rx_thas_time), + .s_axis_teov ({NUM_PORTS{1'b0}}), + .s_axis_teob (axis_rx_teob) + ); + + // Cross the CHDR reset to the radio_clk domain + pulse_synchronizer #( + .MODE ("POSEDGE") + ) ctrl_rst_sync_i ( + .clk_a (rfnoc_chdr_clk), + .rst_a (1'b0), + .pulse_a (rfnoc_chdr_rst), + .busy_a (), + .clk_b (radio_clk), + .pulse_b (radio_rst) + ); + + + //--------------------------------------------------------------------------- + // Decode Control Port Addresses + //--------------------------------------------------------------------------- + // + // This block splits the NoC shell's single master control port interface + // into three masters, connected to the shared registers, radio cores, and + // the external CTRL port peripheral interface. The responses from each of + // these are merged into a single response and sent back to the NoC shell. + // + //--------------------------------------------------------------------------- + + wire ctrlport_shared_req_wr; + wire ctrlport_shared_req_rd; + wire [19:0] ctrlport_shared_req_addr; + wire [31:0] ctrlport_shared_req_data; + wire [ 3:0] ctrlport_shared_req_byte_en; + wire ctrlport_shared_req_has_time; + wire [63:0] ctrlport_shared_req_time; + reg ctrlport_shared_resp_ack = 1'b0; + reg [31:0] ctrlport_shared_resp_data = 0; + + wire ctrlport_core_req_wr; + wire ctrlport_core_req_rd; + wire [19:0] ctrlport_core_req_addr; + wire [31:0] ctrlport_core_req_data; + wire [ 3:0] ctrlport_core_req_byte_en; + wire ctrlport_core_req_has_time; + wire [63:0] ctrlport_core_req_time; + wire ctrlport_core_resp_ack; + wire [31:0] ctrlport_core_resp_data; + + ctrlport_decoder_param #( + .NUM_SLAVES (3), + .PORT_BASE ({PERIPH_BASE_ADDR, RADIO_BASE_ADDR, SHARED_BASE_ADDR}), + .PORT_ADDR_W({PERIPH_ADDR_W, RADIO_ADDR_W + $clog2(NUM_PORTS), SHARED_ADDR_W}) + ) ctrlport_decoder_param_i ( + .ctrlport_clk (radio_clk), + .ctrlport_rst (radio_rst), + .s_ctrlport_req_wr (ctrlport_reg_req_wr), + .s_ctrlport_req_rd (ctrlport_reg_req_rd), + .s_ctrlport_req_addr (ctrlport_reg_req_addr), + .s_ctrlport_req_data (ctrlport_reg_req_data), + .s_ctrlport_req_byte_en (4'b0), + .s_ctrlport_req_has_time (ctrlport_reg_has_time), + .s_ctrlport_req_time (ctrlport_reg_time), + .s_ctrlport_resp_ack (ctrlport_reg_resp_ack), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (ctrlport_reg_resp_data), + .m_ctrlport_req_wr ({m_ctrlport_req_wr, + ctrlport_core_req_wr, + ctrlport_shared_req_wr}), + .m_ctrlport_req_rd ({m_ctrlport_req_rd, + ctrlport_core_req_rd, + ctrlport_shared_req_rd}), + .m_ctrlport_req_addr ({m_ctrlport_req_addr, + ctrlport_core_req_addr, + ctrlport_shared_req_addr}), + .m_ctrlport_req_data ({m_ctrlport_req_data, + ctrlport_core_req_data, + ctrlport_shared_req_data}), + .m_ctrlport_req_byte_en ({m_ctrlport_req_byte_en, + ctrlport_core_req_byte_en, + ctrlport_shared_req_byte_en}), + .m_ctrlport_req_has_time ({m_ctrlport_req_has_time, + ctrlport_core_req_has_time, + ctrlport_shared_req_has_time}), + .m_ctrlport_req_time ({m_ctrlport_req_time, + ctrlport_core_req_time, + ctrlport_shared_req_time}), + .m_ctrlport_resp_ack ({m_ctrlport_resp_ack, + ctrlport_core_resp_ack, + ctrlport_shared_resp_ack}), + .m_ctrlport_resp_status ({m_ctrlport_resp_status, + 2'b00, + 2'b00}), + .m_ctrlport_resp_data ({m_ctrlport_resp_data, + ctrlport_core_resp_data, + ctrlport_shared_resp_data + }) + ); + + + //--------------------------------------------------------------------------- + // Split Radio Control Port Interfaces + //--------------------------------------------------------------------------- + + wire [ NUM_PORTS-1:0] ctrlport_radios_req_wr; + wire [ NUM_PORTS-1:0] ctrlport_radios_req_rd; + wire [20*NUM_PORTS-1:0] ctrlport_radios_req_addr; + wire [32*NUM_PORTS-1:0] ctrlport_radios_req_data; + wire [ NUM_PORTS-1:0] ctrlport_radios_resp_ack; + wire [32*NUM_PORTS-1:0] ctrlport_radios_resp_data; + + ctrlport_decoder #( + .NUM_SLAVES (NUM_PORTS), + .BASE_ADDR (0), + .SLAVE_ADDR_W (RADIO_ADDR_W) + ) ctrlport_decoder_i ( + .ctrlport_clk (radio_clk), + .ctrlport_rst (radio_rst), + .s_ctrlport_req_wr (ctrlport_core_req_wr), + .s_ctrlport_req_rd (ctrlport_core_req_rd), + .s_ctrlport_req_addr (ctrlport_core_req_addr), + .s_ctrlport_req_data (ctrlport_core_req_data), + .s_ctrlport_req_byte_en (4'b0), + .s_ctrlport_req_has_time (1'b0), + .s_ctrlport_req_time (64'b0), + .s_ctrlport_resp_ack (ctrlport_core_resp_ack), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (ctrlport_core_resp_data), + .m_ctrlport_req_wr (ctrlport_radios_req_wr), + .m_ctrlport_req_rd (ctrlport_radios_req_rd), + .m_ctrlport_req_addr (ctrlport_radios_req_addr), + .m_ctrlport_req_data (ctrlport_radios_req_data), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (), + .m_ctrlport_req_time (), + .m_ctrlport_resp_ack (ctrlport_radios_resp_ack), + .m_ctrlport_resp_status ({NUM_PORTS{2'b00}}), + .m_ctrlport_resp_data (ctrlport_radios_resp_data) + ); + + + //--------------------------------------------------------------------------- + // Merge Control Port Interfaces + //--------------------------------------------------------------------------- + // + // This block merges the master control port interfaces of all radio_cores + // into a single master for the NoC shell. + // + //--------------------------------------------------------------------------- + + wire [ NUM_PORTS-1:0] ctrlport_err_radio_req_wr; + wire [20*NUM_PORTS-1:0] ctrlport_err_radio_req_addr; + wire [10*NUM_PORTS-1:0] ctrlport_err_radio_req_portid; + wire [16*NUM_PORTS-1:0] ctrlport_err_radio_req_rem_epid; + wire [10*NUM_PORTS-1:0] ctrlport_err_radio_req_rem_portid; + wire [32*NUM_PORTS-1:0] ctrlport_err_radio_req_data; + wire [ NUM_PORTS-1:0] ctrlport_err_radio_req_has_time; + wire [64*NUM_PORTS-1:0] ctrlport_err_radio_req_time; + wire [ NUM_PORTS-1:0] ctrlport_err_radio_resp_ack; + + ctrlport_combiner #( + .NUM_MASTERS (NUM_PORTS), + .PRIORITY (0) + ) ctrlport_combiner_i ( + .ctrlport_clk (radio_clk), + .ctrlport_rst (radio_rst), + .s_ctrlport_req_wr (ctrlport_err_radio_req_wr), + .s_ctrlport_req_rd ({NUM_PORTS{1'b0}}), + .s_ctrlport_req_addr (ctrlport_err_radio_req_addr), + .s_ctrlport_req_portid (ctrlport_err_radio_req_portid), + .s_ctrlport_req_rem_epid (ctrlport_err_radio_req_rem_epid), + .s_ctrlport_req_rem_portid (ctrlport_err_radio_req_rem_portid), + .s_ctrlport_req_data (ctrlport_err_radio_req_data), + .s_ctrlport_req_byte_en ({4*NUM_PORTS{1'b1}}), + .s_ctrlport_req_has_time (ctrlport_err_radio_req_has_time), + .s_ctrlport_req_time (ctrlport_err_radio_req_time), + .s_ctrlport_resp_ack (ctrlport_err_radio_resp_ack), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (), + .m_ctrlport_req_wr (ctrlport_err_req_wr), + .m_ctrlport_req_rd (), + .m_ctrlport_req_addr (ctrlport_err_req_addr), + .m_ctrlport_req_portid (ctrlport_err_req_portid), + .m_ctrlport_req_rem_epid (ctrlport_err_req_rem_epid), + .m_ctrlport_req_rem_portid (ctrlport_err_req_rem_portid), + .m_ctrlport_req_data (ctrlport_err_req_data), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (ctrlport_err_req_has_time), + .m_ctrlport_req_time (ctrlport_err_req_time), + .m_ctrlport_resp_ack (ctrlport_err_resp_ack), + .m_ctrlport_resp_status (2'b0), + .m_ctrlport_resp_data (32'b0) + ); + + + //--------------------------------------------------------------------------- + // Shared Registers + //--------------------------------------------------------------------------- + // + // These registers are shared by all radio channels. + // + //--------------------------------------------------------------------------- + + localparam [15:0] compat_major = 16'd0; + localparam [15:0] compat_minor = 16'd0; + + always @(posedge radio_clk) begin + if (radio_rst) begin + ctrlport_shared_resp_ack <= 0; + ctrlport_shared_resp_data <= 0; + end else begin + // Default assignments + ctrlport_shared_resp_ack <= 0; + ctrlport_shared_resp_data <= 0; + + // Handle register reads + if (ctrlport_shared_req_rd) begin + case (ctrlport_shared_req_addr) + REG_COMPAT_NUM: begin + ctrlport_shared_resp_ack <= 1; + ctrlport_shared_resp_data <= { compat_major, compat_minor }; + end + endcase + end + end + end + + + //--------------------------------------------------------------------------- + // Radio Cores + //--------------------------------------------------------------------------- + // + // This generate block instantiates one radio core for each channel that is + // requested by NUM_PORTS. + // + //--------------------------------------------------------------------------- + + genvar i; + generate + for (i = 0; i < NUM_PORTS; i = i+1) begin : radio_core_gen + + // The radio core contains all the logic related to a single radio channel. + radio_core #( + .SAMP_W (ITEM_W), + .NSPC (NIPC) + ) radio_core_i ( + .radio_clk (radio_clk), + .radio_rst (radio_rst), + + // Slave Control Port (Register Access) + .s_ctrlport_req_wr (ctrlport_radios_req_wr[i]), + .s_ctrlport_req_rd (ctrlport_radios_req_rd[i]), + .s_ctrlport_req_addr (ctrlport_radios_req_addr[i*20 +: 20]), + .s_ctrlport_req_data (ctrlport_radios_req_data[i*32 +: 32]), + .s_ctrlport_resp_ack (ctrlport_radios_resp_ack[i]), + .s_ctrlport_resp_data (ctrlport_radios_resp_data[i*32 +: 32]), + + // Master Control Port (Error Reporting) + .m_ctrlport_req_wr (ctrlport_err_radio_req_wr[i]), + .m_ctrlport_req_addr (ctrlport_err_radio_req_addr[i*20 +: 20]), + .m_ctrlport_req_portid (ctrlport_err_radio_req_portid[i*10 +: 10]), + .m_ctrlport_req_rem_epid (ctrlport_err_radio_req_rem_epid[i*16 +: 16]), + .m_ctrlport_req_rem_portid (ctrlport_err_radio_req_rem_portid[i*10 +: 10]), + .m_ctrlport_req_data (ctrlport_err_radio_req_data[i*32 +: 32]), + .m_ctrlport_req_has_time (ctrlport_err_radio_req_has_time[i]), + .m_ctrlport_req_time (ctrlport_err_radio_req_time[i*64 +: 64]), + .m_ctrlport_resp_ack (ctrlport_err_radio_resp_ack[i]), + + // Tx Data Stream + .s_axis_tdata (axis_tx_tdata[RADIO_W*i +: RADIO_W]), + .s_axis_tlast (axis_tx_tlast[i]), + .s_axis_tvalid (axis_tx_tvalid[i]), + .s_axis_tready (axis_tx_tready[i]), + // Sideband Info + .s_axis_ttimestamp (axis_tx_ttimestamp[i*64 +: 64]), + .s_axis_thas_time (axis_tx_thas_time[i]), + .s_axis_teob (axis_tx_teob[i]), + + // Rx Data Stream + .m_axis_tdata (axis_rx_tdata[RADIO_W*i +: RADIO_W]), + .m_axis_tlast (axis_rx_tlast[i]), + .m_axis_tvalid (axis_rx_tvalid[i]), + .m_axis_tready (axis_rx_tready[i]), + // Sideband Info + .m_axis_ttimestamp (axis_rx_ttimestamp[i*64 +: 64]), + .m_axis_thas_time (axis_rx_thas_time[i]), + .m_axis_teob (axis_rx_teob[i]), + + // Radio Data + .radio_time (radio_time), + .radio_rx_data (radio_rx_data[(RADIO_W)*i +: (RADIO_W)]), + .radio_rx_stb (radio_rx_stb[i]), + .radio_rx_running (radio_rx_running[i]), + .radio_tx_data (radio_tx_data[(RADIO_W)*i +: (RADIO_W)]), + .radio_tx_stb (radio_tx_stb[i]), + .radio_tx_running (radio_tx_running[i]) + ); + end + endgenerate +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rfnoc_block_radio_all_tb.sv b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rfnoc_block_radio_all_tb.sv new file mode 100644 index 000000000..ea99692bb --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rfnoc_block_radio_all_tb.sv @@ -0,0 +1,68 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_radio_all_tb +// +// Description: This is the testbench for rfnoc_block_radio that instantiates +// several variations of rfnoc_block_radio_tb to test different configurations. +// + + +module rfnoc_block_radio_all_tb; + + timeunit 1ns; + timeprecision 1ps; + + import PkgTestExec::*; + + + //--------------------------------------------------------------------------- + // Test Definitions + //--------------------------------------------------------------------------- + + typedef struct { + int CHDR_W; + int ITEM_W; + int NIPC; + int NUM_PORTS; + int STALL_PROB; + int STB_PROB; + bit TEST_REGS; + } test_config_t; + + localparam NUM_TESTS = 9; + + localparam test_config_t test[NUM_TESTS] = '{ + '{CHDR_W: 64, ITEM_W: 16, NIPC: 1, NUM_PORTS: 3, STALL_PROB: 10, STB_PROB: 100, TEST_REGS: 1 }, + '{CHDR_W: 64, ITEM_W: 16, NIPC: 1, NUM_PORTS: 2, STALL_PROB: 25, STB_PROB: 80, TEST_REGS: 1 }, + '{CHDR_W: 64, ITEM_W: 16, NIPC: 2, NUM_PORTS: 1, STALL_PROB: 25, STB_PROB: 80, TEST_REGS: 0 }, + '{CHDR_W: 64, ITEM_W: 32, NIPC: 1, NUM_PORTS: 1, STALL_PROB: 25, STB_PROB: 80, TEST_REGS: 0 }, + '{CHDR_W: 64, ITEM_W: 32, NIPC: 2, NUM_PORTS: 1, STALL_PROB: 10, STB_PROB: 80, TEST_REGS: 0 }, + '{CHDR_W: 128, ITEM_W: 32, NIPC: 1, NUM_PORTS: 3, STALL_PROB: 10, STB_PROB: 100, TEST_REGS: 1 }, + '{CHDR_W: 128, ITEM_W: 32, NIPC: 1, NUM_PORTS: 2, STALL_PROB: 25, STB_PROB: 80, TEST_REGS: 0 }, + '{CHDR_W: 128, ITEM_W: 32, NIPC: 2, NUM_PORTS: 1, STALL_PROB: 25, STB_PROB: 80, TEST_REGS: 0 }, + '{CHDR_W: 128, ITEM_W: 32, NIPC: 4, NUM_PORTS: 1, STALL_PROB: 10, STB_PROB: 80, TEST_REGS: 0 } + }; + + + //--------------------------------------------------------------------------- + // DUT Instances + //--------------------------------------------------------------------------- + + genvar i; + for (i = 0; i < NUM_TESTS; i++) begin : gen_test_config + rfnoc_block_radio_tb #( + .CHDR_W (test[i].CHDR_W ), + .ITEM_W (test[i].ITEM_W ), + .NIPC (test[i].NIPC ), + .NUM_PORTS (test[i].NUM_PORTS ), + .STALL_PROB (test[i].STALL_PROB), + .STB_PROB (test[i].STB_PROB ), + .TEST_REGS (test[i].TEST_REGS ) + ) rfnoc_block_radio_tb_i (); + end : gen_test_config + + +endmodule : rfnoc_block_radio_all_tb diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rfnoc_block_radio_regs.vh b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rfnoc_block_radio_regs.vh new file mode 100644 index 000000000..41f9a144e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rfnoc_block_radio_regs.vh @@ -0,0 +1,125 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_radio_regs (Header) +// +// Description: Header file for RFNoC radio functionality. This includes +// register offsets, bitfields and constants for the radio components. +// + + +//----------------------------------------------------------------------------- +// Shared Register Offsets (One Set Per Radio NoC Block) +//----------------------------------------------------------------------------- + +localparam SHARED_BASE_ADDR = 20'h00; // Base address for shared radio registers +localparam SHARED_ADDR_W = 4; // Address space size for shared registers + +localparam REG_COMPAT_NUM = 'h00; // Compatibility number register offset + + +//----------------------------------------------------------------------------- +// Radio Core Register Offsets (One Set Per Radio Port) +//----------------------------------------------------------------------------- +// +// These registers are replicated depending on the number of radio channels +// requested. They start at BASE_ADDR_RADIO and repeat every RADIO_ADDR_SPACE +// bytes. +// +// WARNING: All registers larger than a single 32-bit word must be read and +// written least significant word first to guarantee coherency. +// +//----------------------------------------------------------------------------- + +localparam RADIO_BASE_ADDR = 20'h1000; // Base address of first radio. Choose a + // nice big power of 2 so we can just pass + // the lower bits to the radio cores. +localparam RADIO_ADDR_W = 7; // Address space size per radio + +// General Radio Registers +localparam REG_LOOPBACK_EN = 'h00; // Loopback enable (connect Tx output to Rx input) +localparam REG_RADIO_WIDTH = 'h04; // Upper 16 bits is sample width, lower 16 bits is NSPC + +// RX Control Registers +localparam REG_RX_STATUS = 'h10; // Status of Rx radio +localparam REG_RX_CMD = 'h14; // The next radio command to execute +localparam REG_RX_CMD_NUM_WORDS_LO = 'h18; // Number of radio words for the next command (low word) +localparam REG_RX_CMD_NUM_WORDS_HI = 'h1C; // Number of radio words for the next command (high word) +localparam REG_RX_CMD_TIME_LO = 'h20; // Time for the next command (low word) +localparam REG_RX_CMD_TIME_HI = 'h24; // Time for the next command (high word) +localparam REG_RX_MAX_WORDS_PER_PKT = 'h28; // Maximum packet length to build from Rx data +localparam REG_RX_ERR_PORT = 'h2C; // Port ID for error reporting +localparam REG_RX_ERR_REM_PORT = 'h30; // Remote port ID for error reporting +localparam REG_RX_ERR_REM_EPID = 'h34; // Remote EPID (endpoint ID) for error reporting +localparam REG_RX_ERR_ADDR = 'h38; // Offset to write error code to +localparam REG_RX_DATA = 'h3C; // Read the current Rx output of the radio +localparam REG_RX_HAS_TIME = 'h70; // Controls whether or not a channel has timestamps + +// TX Control Registers +localparam REG_TX_IDLE_VALUE = 'h40; // Value to output when transmitter is idle +localparam REG_TX_ERROR_POLICY = 'h44; // Tx error policy +localparam REG_TX_ERR_PORT = 'h48; // Port ID for error reporting +localparam REG_TX_ERR_REM_PORT = 'h4C; // Remote port ID for error reporting +localparam REG_TX_ERR_REM_EPID = 'h50; // Remote EPID (endpoint ID) for error reporting +localparam REG_TX_ERR_ADDR = 'h54; // Offset to write error code to + + +//----------------------------------------------------------------------------- +// Register Bit Fields +//----------------------------------------------------------------------------- + +// REG_RX_CMD bit fields +localparam RX_CMD_POS = 0; // Location of the command bit field +localparam RX_CMD_LEN = 2; // Bit length of the command bit field +localparam RX_CMD_TIMED_POS = 31; // Location of the bit indicating if this is + // a timed command or not. + +// REG_RX_CMD_NUM_WORDS_HI/LO length field +localparam RX_CMD_NUM_WORDS_LEN = 48; // Number of bits that are used in the 64-bit + // NUM_WORDS register (must be in range [33:64]). + +// REG_RX_STATUS bit fields +localparam CMD_FIFO_SPACE_POS = 0; // Indicates if radio is busy executing a command. +localparam CMD_FIFO_SPACE_LEN = 6; // Length of the FIFO_SPACE field +localparam CMD_FIFO_SPACE_MAX = 32; // Size of command FIFO + +// REG_TX_ERROR_POLICY bit fields +localparam TX_ERR_POLICY_LEN = 2; // Length of error policy bit field + + +//----------------------------------------------------------------------------- +// Rx Radio Commands +//----------------------------------------------------------------------------- + +localparam [RX_CMD_LEN-1:0] RX_CMD_STOP = 0; // Stop acquiring at end of next packet +localparam [RX_CMD_LEN-1:0] RX_CMD_FINITE = 1; // Acquire NUM_SAMPS then stop +localparam [RX_CMD_LEN-1:0] RX_CMD_CONTINUOUS = 2; // Acquire until stopped + + +//----------------------------------------------------------------------------- +// Tx Error Policies +//----------------------------------------------------------------------------- + +localparam TX_ERR_POLICY_PACKET = 1; // Wait for end of packet after error +localparam TX_ERR_POLICY_BURST = 2; // Wait for end of burst after error + + +//----------------------------------------------------------------------------- +// Error Codes +//----------------------------------------------------------------------------- + +// Rx Error Codes +localparam ERR_RX_CODE_W = 2; // Bit width of error code values +// +localparam ERR_RX_LATE_CMD = 1; // Late command (arrived after indicated time) +localparam ERR_RX_OVERRUN = 2; // FIFO overflow + + +// Tx Error Codes +localparam ERR_TX_CODE_W = 2; // Bit width of error code values +// +localparam ERR_TX_UNDERRUN = 1; // Data underflow (data not available when needed) +localparam ERR_TX_LATE_DATA = 2; // Late data (arrived after indicated time) +localparam ERR_TX_EOB_ACK = 3; // Acknowledge end-of-burst (this is not an error) diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rfnoc_block_radio_tb.sv b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rfnoc_block_radio_tb.sv new file mode 100644 index 000000000..706e0f185 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rfnoc_block_radio_tb.sv @@ -0,0 +1,1382 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_block_radio_tb +// +// Description: This is the testbench for rfnoc_block_radio. +// + + +module rfnoc_block_radio_tb #( + parameter int CHDR_W = 128, // CHDR bus width + parameter int ITEM_W = 32, // Sample width + parameter int NIPC = 2, // Number of samples per radio clock cycle + parameter int NUM_PORTS = 2, // Number of radio channels + parameter int STALL_PROB = 25, // Probability of AXI BFM stall + parameter int STB_PROB = 80, // Probability of radio STB asserting + parameter bit TEST_REGS = 1 // Do register tests +); + + // Include macros and time declarations for use with PkgTestExec + `include "test_exec.svh" + + import PkgTestExec::*; + import PkgChdrUtils::*; + import PkgRfnocBlockCtrlBfm::*; + import PkgAxisCtrlBfm::*; + import PkgChdrBfm::*; + import PkgRfnocItemUtils::*; + + // Pull in radio register offsets and constants + `include "rfnoc_block_radio_regs.vh" + + + // Simulation Parameters + localparam logic [ 9:0] THIS_PORTID = 10'h17; + localparam logic [15:0] THIS_EPID = 16'hDEAD; + localparam int MTU = 8; + localparam int RADIO_W = NIPC * ITEM_W; // Radio word size + localparam int SPP = 64; // Samples per packet + localparam int WPP = SPP*ITEM_W/RADIO_W; // Radio words per packet + localparam int CHDR_CLK_PER = 5; // rfnoc_chdr_clk period in ns + localparam int CTRL_CLK_PER = 25; // rfnoc_ctrl_clk period in ns + localparam int RADIO_CLK_PER = 10; // radio_clk_per period in ns + + // Amount of time to wait for a packet to be fully acquired + localparam realtime MAX_PKT_WAIT = 4*WPP*(RADIO_CLK_PER+CTRL_CLK_PER)*1ns; + + // Error reporting values to use + localparam bit [ 9:0] TX_ERR_DST_PORT = 10'h2B5; + localparam bit [ 9:0] TX_ERR_REM_DST_PORT = 10'h14C; + localparam bit [15:0] TX_ERR_REM_DST_EPID = 16'hA18E; + localparam bit [19:0] TX_ERR_ADDRESS = 20'hA31D3; + + + + //--------------------------------------------------------------------------- + // Clocks and Resets + //--------------------------------------------------------------------------- + + bit rfnoc_chdr_clk; + bit rfnoc_ctrl_clk; + bit radio_clk; + + // Don't start the clocks automatically (AUTOSTART=0), since we expect + // multiple instances of this testbench to run in sequence. They will be + // started before the first test. + sim_clock_gen #(.PERIOD(CHDR_CLK_PER), .AUTOSTART(0)) + rfnoc_chdr_clk_gen (.clk(rfnoc_chdr_clk), .rst()); + sim_clock_gen #(.PERIOD(CTRL_CLK_PER), .AUTOSTART(0)) + rfnoc_ctrl_clk_gen (.clk(rfnoc_ctrl_clk), .rst()); + sim_clock_gen #(.PERIOD(RADIO_CLK_PER), .AUTOSTART(0)) + radio_clk_gen (.clk(radio_clk), .rst()); + + + + //--------------------------------------------------------------------------- + // Bus Functional Models + //--------------------------------------------------------------------------- + + // Connections to DUT as interfaces: + RfnocBackendIf backend (rfnoc_chdr_clk, rfnoc_ctrl_clk); + AxiStreamIf #(32) m_ctrl (rfnoc_ctrl_clk, 1'b0); + AxiStreamIf #(32) s_ctrl (rfnoc_ctrl_clk, 1'b0); + AxiStreamIf #(CHDR_W) m_chdr [NUM_PORTS] (rfnoc_chdr_clk, 1'b0); + AxiStreamIf #(CHDR_W) s_chdr [NUM_PORTS] (rfnoc_chdr_clk, 1'b0); + + // Bus functional model for a software block controller + RfnocBlockCtrlBfm #(.CHDR_W(CHDR_W)) blk_ctrl; + + + + //--------------------------------------------------------------------------- + // Radio Data Model + //--------------------------------------------------------------------------- + + bit [NUM_PORTS*RADIO_W-1:0] radio_rx_data; + bit [ NUM_PORTS-1:0] radio_rx_stb; + + bit [63:0] radio_time; + bit radio_pps; + + // Radio data generation + sim_radio_gen #( + .NSPC (NIPC), + .SAMP_W (ITEM_W), + .NUM_CHANNELS (NUM_PORTS), + .STB_PROB (STB_PROB), + .INCREMENT (NIPC), + .PPS_PERIOD (NIPC * 250) + ) radio_gen ( + .radio_clk (radio_clk), + .radio_rst (1'b0), + .radio_rx_data (radio_rx_data), + .radio_rx_stb (radio_rx_stb), + .radio_time (radio_time), + .radio_pps (radio_pps) + ); + + + + //--------------------------------------------------------------------------- + // DUT + //--------------------------------------------------------------------------- + + logic [NUM_PORTS-1:0] radio_rx_running; + + logic [NUM_PORTS*RADIO_W-1:0] radio_tx_data; + logic [ NUM_PORTS-1:0] radio_tx_stb; + logic [ NUM_PORTS-1:0] radio_tx_running; + + logic [NUM_PORTS*CHDR_W-1:0] s_rfnoc_chdr_tdata_flat; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tlast_flat; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tvalid_flat; + logic [ NUM_PORTS-1:0] s_rfnoc_chdr_tready_flat; + + logic [NUM_PORTS*CHDR_W-1:0] m_rfnoc_chdr_tdata_flat; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tlast_flat; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tvalid_flat; + logic [ NUM_PORTS-1:0] m_rfnoc_chdr_tready_flat; + + semaphore port_sem = new(0); + + // Use the same strobe for both Rx and Tx + assign radio_tx_stb = radio_rx_stb; + + + // Flatten the data stream arrays into concatenated vectors + genvar i; + for (i = 0; i < NUM_PORTS; i++) begin : gen_radio_connections + assign s_rfnoc_chdr_tdata_flat[CHDR_W*i+:CHDR_W] = m_chdr[i].tdata; + assign s_rfnoc_chdr_tlast_flat[i] = m_chdr[i].tlast; + assign s_rfnoc_chdr_tvalid_flat[i] = m_chdr[i].tvalid; + assign m_chdr[i].tready = s_rfnoc_chdr_tready_flat[i]; + + assign s_chdr[i].tdata = m_rfnoc_chdr_tdata_flat[CHDR_W*i+:CHDR_W]; + assign s_chdr[i].tlast = m_rfnoc_chdr_tlast_flat[i]; + assign s_chdr[i].tvalid = m_rfnoc_chdr_tvalid_flat[i]; + assign m_rfnoc_chdr_tready_flat[i] = s_chdr[i].tready; + + // Connect each interface to the BFM. This is done in a generate block + // since the interface indices must be constant in SystemVerilog :( + initial begin + // Get the port number (plus 1) from the semaphore. This will block until + // the semaphore is incremented to this port number (plus 1). + port_sem.get(i+1); + // Connect the master and slave interfaces to the BFM + void'(blk_ctrl.add_master_data_port(m_chdr[i])); + void'(blk_ctrl.add_slave_data_port(s_chdr[i])); + // Put the port number to communicate that we're done + port_sem.put(i+1); + end + end + + + rfnoc_block_radio #( + .THIS_PORTID (THIS_PORTID), + .CHDR_W (CHDR_W), + .NIPC (NIPC), + .ITEM_W (ITEM_W), + .NUM_PORTS (NUM_PORTS), + .MTU (MTU) + ) rfnoc_block_radio_i ( + .rfnoc_chdr_clk (backend.chdr_clk), + .s_rfnoc_chdr_tdata (s_rfnoc_chdr_tdata_flat), + .s_rfnoc_chdr_tlast (s_rfnoc_chdr_tlast_flat), + .s_rfnoc_chdr_tvalid (s_rfnoc_chdr_tvalid_flat), + .s_rfnoc_chdr_tready (s_rfnoc_chdr_tready_flat), + .m_rfnoc_chdr_tdata (m_rfnoc_chdr_tdata_flat), + .m_rfnoc_chdr_tlast (m_rfnoc_chdr_tlast_flat), + .m_rfnoc_chdr_tvalid (m_rfnoc_chdr_tvalid_flat), + .m_rfnoc_chdr_tready (m_rfnoc_chdr_tready_flat), + .rfnoc_core_config (backend.cfg), + .rfnoc_core_status (backend.sts), + .rfnoc_ctrl_clk (backend.ctrl_clk), + .s_rfnoc_ctrl_tdata (m_ctrl.tdata), + .s_rfnoc_ctrl_tlast (m_ctrl.tlast), + .s_rfnoc_ctrl_tvalid (m_ctrl.tvalid), + .s_rfnoc_ctrl_tready (m_ctrl.tready), + .m_rfnoc_ctrl_tdata (s_ctrl.tdata), + .m_rfnoc_ctrl_tlast (s_ctrl.tlast), + .m_rfnoc_ctrl_tvalid (s_ctrl.tvalid), + .m_rfnoc_ctrl_tready (s_ctrl.tready), + .m_ctrlport_req_wr (), + .m_ctrlport_req_rd (), + .m_ctrlport_req_addr (), + .m_ctrlport_req_data (), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (), + .m_ctrlport_req_time (), + .m_ctrlport_resp_ack (1'b0), + .m_ctrlport_resp_status (2'b0), + .m_ctrlport_resp_data (32'b0), + .radio_clk (radio_clk), + .radio_time (radio_time), + .radio_rx_data (radio_rx_data), + .radio_rx_stb (radio_rx_stb), + .radio_rx_running (radio_rx_running), + .radio_tx_data (radio_tx_data), + .radio_tx_stb (radio_tx_stb), + .radio_tx_running (radio_tx_running) + ); + + + + //--------------------------------------------------------------------------- + // Helper Tasks + //--------------------------------------------------------------------------- + + // Read a 32-bit register at offset "addr" from shared radio registers + task automatic read_shared(logic [19:0] addr, output logic [31:0] data); + addr = addr + SHARED_BASE_ADDR; + blk_ctrl.reg_read(addr, data); + endtask : read_shared + + // Write a 32-bit register at offset "addr" in shared radio registers + task automatic write_shared(logic [19:0] addr, logic [31:0] data); + addr = addr + SHARED_BASE_ADDR; + blk_ctrl.reg_write(addr, data); + endtask : write_shared + + // Read a 32-bit register at offset "addr" from radio "radio_num" + task automatic read_radio(int radio_num, logic [19:0] addr, output logic [31:0] data); + addr = addr + RADIO_BASE_ADDR + (radio_num * 2**RADIO_ADDR_W); + blk_ctrl.reg_read(addr, data); + endtask : read_radio + + // Read a 64-bit register at offset "addr" from radio "radio_num" + task automatic read_radio_64(int radio_num, logic [19:0] addr, output logic [63:0] data); + addr = addr + RADIO_BASE_ADDR + (radio_num * 2**RADIO_ADDR_W); + blk_ctrl.reg_read(addr, data[31:0]); + blk_ctrl.reg_read(addr+4, data[63:32]); + endtask : read_radio_64 + + // Write a 32-bit register at offset "addr" in radio "radio_num" + task automatic write_radio(int radio_num, logic [19:0] addr, logic [31:0] data); + addr = addr + RADIO_BASE_ADDR + (radio_num * 2**RADIO_ADDR_W); + blk_ctrl.reg_write(addr, data); + endtask : write_radio + + // Write a 64-bit register at offset "addr" in radio "radio_num" + task automatic write_radio_64(int radio_num, logic [19:0] addr, logic [63:0] data); + addr = addr + RADIO_BASE_ADDR + (radio_num * 2**RADIO_ADDR_W); + blk_ctrl.reg_write(addr, data[31:0]); + blk_ctrl.reg_write(addr+4, data[63:32]); + endtask : write_radio_64 + + + // Start an Rx acquisition + task automatic start_rx ( + int radio_num, // Radio channel to use + bit [63:0] num_words = 0 // Number of radio words + ); + logic [31:0] cmd; + + if (num_words == 0) begin + // Do a continuous acquisition + $display("Radio %0d: Start RX, continuous receive", radio_num); + cmd = RX_CMD_CONTINUOUS; + end else begin + // Do a finite acquisition (num samps and done) + $display("Radio %0d: Start RX, receive %0d words", radio_num, num_words); + write_radio_64(radio_num, REG_RX_CMD_NUM_WORDS_LO, num_words); + cmd = RX_CMD_FINITE; + end + + // Write command to radio + write_radio(radio_num, REG_RX_CMD, cmd); + endtask : start_rx + + + // Start an Rx acquisition at a specific time + task automatic start_rx_timed ( + int radio_num, // Radio channel to use + bit [63:0] num_words = 0, // Number of radio words + bit [63:0] start_time + ); + logic [31:0] cmd; + + if (num_words == 0) begin + // Do a continuous acquisition + $display("Radio %0d: Start RX, continuous receive (timed)", radio_num); + cmd = RX_CMD_CONTINUOUS; + end else begin + // Do a finite acquisition (num samps and done) + $display("Radio %0d: Start RX, receive %0d words (timed)", radio_num, num_words); + write_radio_64(radio_num, REG_RX_CMD_NUM_WORDS_LO, num_words); + cmd = RX_CMD_FINITE; + end + + // Mark that this is a timed command + cmd[RX_CMD_TIMED_POS] = 1'b1; + + // Set start time for command + write_radio_64(radio_num, REG_RX_CMD_TIME_LO, start_time); + + // Write command to radio + write_radio(radio_num, REG_RX_CMD, cmd); + endtask : start_rx_timed + + + // Send the Rx stop command to the indicated radio channel + task automatic stop_rx(int radio_num); + $display("Radio %0d: Stop RX", radio_num); + write_radio(radio_num, REG_RX_CMD, RX_CMD_STOP); + endtask : stop_rx + + + // Receive num_words from the indicated radio channel and verify that it's + // sequential and contiguous data aligned on packet boundaries. + task automatic check_rx( + int radio_num, // Radio to receive from and check + int num_words // Number of radio words to expect + ); + int sample_count; // Counter to track number of samples generated + bit [ITEM_W-1:0] sample_val; // Value of the next sample + chdr_word_t data[$]; // Array of data for the received packet + int num_samples; // Number of samples to send + int byte_length; // Number of data bytes in next packet + int expected_length; // Expected byte length of the next packet + int valid_words; // Number of valid chdr_word_t in next packet + + num_samples = num_words * NIPC; + + sample_count = 0; + while (sample_count < num_samples) begin + // Fetch the next packet + blk_ctrl.recv(radio_num, data, byte_length); + + // Take the first sample as a starting count for the remaining samples + if (sample_count == 0) begin + sample_val = data[0][ITEM_W-1:0]; + end + + // Calculate expected length in bytes + if (num_samples - sample_count >= SPP) begin + // Expecting a full packet + expected_length = SPP*ITEM_W/8; + end else begin + // Expecting partial packet + expected_length = (num_samples - sample_count) * ITEM_W/8; + end + + // Check that the length matches our expectation + `ASSERT_ERROR( + byte_length == expected_length, + "Received packet didn't have expected length." + ); + + // Loop over the packet, one chdr_word_t at a time + valid_words = int'($ceil(real'(byte_length) / ($bits(chdr_word_t)/8))); + for (int i = 0; i < valid_words; i++) begin + // Check each sample of the next chdr_word_t value + for (int sub_sample = 0; sub_sample < $bits(chdr_word_t)/ITEM_W; sub_sample++) begin + chdr_word_t word; + word = data[i][ITEM_W*sub_sample +: ITEM_W]; // Work around Vivado 2018.3 issue + `ASSERT_ERROR( + word == sample_val, + $sformatf( + "Sample %0d (0x%X) didn't match expected value (0x%X)", + sample_count, data[i][ITEM_W*sub_sample +: ITEM_W], sample_val + ) + ); + sample_val++; + sample_count++; + + // Check if the word is only partially full + if (sample_count >= num_samples) break; + end + end + end + endtask : check_rx + + + // Send num_words to the indicated radio for transmission at the given time. + task automatic start_tx_timed ( + int radio_num, // Radio channel to transmit on + bit [63:0] num_words, // Number of radio words to transmit + logic [63:0] start_time = 'X, // Time at which to begin transmit + bit [ITEM_W-1:0] start_val = 1, // Initial sample value + bit eob = 1 // Set EOB flag at the end + ); + int sample_count; // Counter to track number of samples generated + bit [ITEM_W-1:0] sample_val; // Value of the next sample + chdr_word_t data[$]; // Array of data for the packet + int num_samples; // Number of samples to send + int byte_length; // Number of bytes for next packet + chdr_word_t chdr_word; // Next word to send to BFM + packet_info_t pkt_info = 0; // Flags/timestamp for next packet + + $display("Radio %0d: Start TX, send %0d words", radio_num, num_words); + + num_samples = num_words * NIPC; + + if (!$isunknown(start_time)) pkt_info.has_time = 1; + + sample_val = start_val; + sample_count = 0; + while (sample_count < num_samples) begin + // Calculate timestamp for this packet + if (pkt_info.has_time) begin + pkt_info.timestamp = start_time + sample_count; + end + + // Clear the payload + data = {}; + + // Loop until we've built up a packet + forever begin + // Generate the next word + for (int sub_sample = 0; sub_sample < $bits(chdr_word_t)/ITEM_W; sub_sample++) begin + chdr_word[ITEM_W*sub_sample +: ITEM_W] = sample_val; + sample_val++; + sample_count++; + end + + // Add next word to the queue + data.push_back(chdr_word); + + // Send the packet if we're at a packet boundary + if (sample_count % SPP == 0) begin + pkt_info.eob = (sample_count == num_samples && eob) ? 1 : 0; + byte_length = SPP * ITEM_W/8; + blk_ctrl.send(radio_num, data, byte_length, {}, pkt_info); + break; + end else if (sample_count >= num_samples) begin + pkt_info.eob = eob; + byte_length = (sample_count % SPP) * ITEM_W/8; + blk_ctrl.send(radio_num, data, byte_length, {}, pkt_info); + break; + end + end + end + endtask : start_tx_timed + + + // Send num_words to the indicated radio for transmission. + task automatic start_tx ( + int radio_num, // Radio channel to transmit on + bit [63:0] num_words, // Number of radio words to transmit + bit [ITEM_W-1:0] start_val = 1, // Initial sample value + bit eob = 1 // Set EOB flag at the end + ); + // Passing 'X tells the underlying BFM to not insert a timestamp + start_tx_timed(radio_num, num_words, 'X, start_val, eob); + endtask : start_tx + + + // Verify the output of a packet, expecting it at a specific time + task automatic check_tx_timed ( + int radio_num, // Radio channel to transmit on + bit [63:0] num_words, // Number of radio words to expect + logic [63:0] start_time = 'X, // Expected start time + bit [ITEM_W-1:0] start_val = 1 // Initial sample value + ); + int sample_val; // Expected value of next sample + + sample_val = start_val; + + // Wait for the packet to start + wait(radio_tx_data[radio_num*RADIO_W +: ITEM_W] == start_val); + + // Check the time + if (!$isunknown(start_time)) begin + `ASSERT_ERROR( + radio_time - start_time <= NIPC*2, + $sformatf("Packet transmitted at radio time 0x%0X but expected 0x%0X", radio_time, start_time) + ); + end + + // Verify output one word at a time + for (int word_count = 0; word_count < num_words; word_count++) begin + // Wait for the next radio word to be output + do begin + @(posedge radio_clk); + end while (radio_tx_stb[radio_num] == 0); + + // Check each sample of the radio word + for (int sub_sample = 0; sub_sample < NIPC; sub_sample++) begin + `ASSERT_ERROR( + radio_tx_data[radio_num*RADIO_W + ITEM_W*sub_sample +: ITEM_W] == sample_val, + "Radio output doesn't match expected value" + ); + sample_val++; + end + end + endtask : check_tx_timed + + + // Verify the output of a packet + task automatic check_tx ( + int radio_num, // Radio to transmit on + bit [63:0] num_words, // Number of radio words to expect + bit [ITEM_W-1:0] start_val = 1 // Initial sample value + ); + check_tx_timed(radio_num, num_words, 'X, start_val); + endtask : check_tx + + + // When we expect and error, this task will check that control packets were + // received and that they have the expected values. + task check_error (int error); + AxisCtrlPacket ctrl_packet; + chdr_word_t word; + + // Get error code + blk_ctrl.get_ctrl_bfm().get_ctrl(ctrl_packet); + word = ctrl_packet.data[0]; // Work around Vivado 2018.3 issue + `ASSERT_ERROR( + word == error && + ctrl_packet.op_word.op_code == CTRL_OP_WRITE && + ctrl_packet.op_word.address == TX_ERR_ADDRESS && + ctrl_packet.header.is_ack == 1'b0 && + ctrl_packet.header.has_time == 1'b1 && + ctrl_packet.header.num_data == 1 && + ctrl_packet.header.dst_port == TX_ERR_DST_PORT && + ctrl_packet.header.rem_dst_port == TX_ERR_REM_DST_PORT && + ctrl_packet.header.rem_dst_epid == TX_ERR_REM_DST_EPID, + "Unexpected error code response"); + + // Send acknowledgment + ctrl_packet.header = 0; + ctrl_packet.header.is_ack = 1; + blk_ctrl.get_ctrl_bfm().put_ctrl(ctrl_packet); + endtask : check_error + + + + //--------------------------------------------------------------------------- + // Test Procedures + //--------------------------------------------------------------------------- + + task automatic test_block_info(); + test.start_test("Verify Block Info", 2us); + + // Get static block info and validate it + `ASSERT_ERROR(blk_ctrl.get_noc_id() == rfnoc_block_radio_i.NOC_ID, "Incorrect noc_id Value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_i() == NUM_PORTS, "Incorrect num_data_i Value"); + `ASSERT_ERROR(blk_ctrl.get_num_data_o() == NUM_PORTS, "Incorrect num_data_o Value"); + `ASSERT_ERROR(blk_ctrl.get_ctrl_fifosize() == rfnoc_block_radio_i.noc_shell_radio_i.CTRL_FIFO_SIZE, + "Incorrect ctrl_fifosize Value"); + `ASSERT_ERROR(blk_ctrl.get_mtu() == MTU, "Incorrect mtu Value"); + + test.end_test(); + endtask : test_block_info + + + + task automatic test_shared_registers(); + logic [31:0] val; + test.start_test("Shared Registers", 10us); + + // Compatibility number + read_shared(REG_COMPAT_NUM, val); + `ASSERT_ERROR( + val == { + rfnoc_block_radio_i.compat_major, + rfnoc_block_radio_i.compat_minor + }, + "REG_COMPAT_NUM didn't read correctly" + ); + test.end_test(); + endtask : test_shared_registers + + + + task automatic test_general_registers(int radio_num); + logic [31:0] val; + test.start_test("General Registers", 10us); + + // Test loopback enable register (read/write) + read_radio(radio_num, REG_LOOPBACK_EN, val); + `ASSERT_ERROR(val == 0, "Initial value of REG_LOOPBACK_EN is incorrect"); + write_radio(radio_num, REG_LOOPBACK_EN, 32'hFFFFFFFF); + read_radio(radio_num, REG_LOOPBACK_EN, val); + `ASSERT_ERROR(val == 1, "REG_LOOPBACK_EN didn't update correctly"); + write_radio(radio_num, REG_LOOPBACK_EN, 0); + + // Read ITEM_W and NIPC (read only) + read_radio(radio_num, REG_RADIO_WIDTH, val); + `ASSERT_ERROR(val[15:0] == NIPC, "Value of NIPC register is incorrect"); + `ASSERT_ERROR(val[31:16] == ITEM_W, "Value of ITEM_W register is incorrect"); + + test.end_test(); + endtask : test_general_registers + + + + task test_rx_registers(int radio_num); + logic [63:0] val, temp, expected; + localparam int num_words_len = RX_CMD_NUM_WORDS_LEN; + + test.start_test("Rx Registers", 50us); + + // REG_RX_CMD_STATUS (read only) + expected = CMD_FIFO_SPACE_MAX; + read_radio(radio_num, REG_RX_STATUS, val); + `ASSERT_ERROR(val == expected, "REG_RX_STATUS not initially CMD_FIFO_SPACE_MAX"); + + // REG_RX_CMD (read/write). Test a bogus timed stop command just to check + // read/write of the register. + expected = 0; + expected[RX_CMD_POS +: RX_CMD_LEN] = RX_CMD_STOP; + expected[RX_CMD_TIMED_POS] = 1'b1; + write_radio(radio_num, REG_RX_CMD, expected); + read_radio(radio_num, REG_RX_CMD, val); + `ASSERT_ERROR(val == expected, "REG_RX_CMD didn't update correctly"); + + // REG_RX_CMD_NUM_WORDS (read/write) + read_radio_64(radio_num, REG_RX_CMD_NUM_WORDS_LO, val); + `ASSERT_ERROR(val == 0, "REG_RX_CMD_NUM_WORDS not initially 0"); + expected = 64'hFEDCBA9876543210; + write_radio_64(radio_num, REG_RX_CMD_NUM_WORDS_LO, expected); + read_radio_64(radio_num, REG_RX_CMD_NUM_WORDS_LO, val); + `ASSERT_ERROR( + val == expected[num_words_len-1:0], + "REG_RX_CMD_NUM_WORDS didn't update correctly" + ); + + // REG_RX_CMD_TIME (read/write) + read_radio_64(radio_num, REG_RX_CMD_TIME_LO, val); + `ASSERT_ERROR(val == 0, "REG_RX_CMD_TIME not initially 0"); + expected = 64'hBEADFEED0123F1FE; + write_radio_64(radio_num, REG_RX_CMD_TIME_LO, expected); + read_radio_64(radio_num, REG_RX_CMD_TIME_LO, val); + `ASSERT_ERROR(val == expected, "REG_RX_CMD_TIME didn't update correctly"); + + // REG_RX_MAX_WORDS_PER_PKT (read/write) + read_radio(radio_num, REG_RX_MAX_WORDS_PER_PKT, val); + `ASSERT_ERROR(val == 64, "REG_RX_MAX_WORDS_PER_PKT not initially 64"); + expected = 32'hABBEC001; + write_radio(radio_num, REG_RX_MAX_WORDS_PER_PKT, expected); + read_radio(radio_num, REG_RX_MAX_WORDS_PER_PKT, val); + `ASSERT_ERROR(val == expected, "REG_RX_MAX_WORDS_PER_PKT didn't update correctly"); + + // REG_RX_ERR_PORT (read/write) + read_radio(radio_num, REG_RX_ERR_PORT, val); + `ASSERT_ERROR(val == 0, "REG_RX_ERR_PORT not initially 0"); + expected = $urandom() & 32'h000001FF; + write_radio(radio_num, REG_RX_ERR_PORT, expected); + read_radio(radio_num, REG_RX_ERR_PORT, val); + `ASSERT_ERROR(val == expected, "REG_RX_ERR_PORT didn't update correctly"); + + // REG_RX_ERR_REM_PORT (read/write) + read_radio(radio_num, REG_RX_ERR_REM_PORT, val); + `ASSERT_ERROR(val == 0, "REG_RX_ERR_REM_PORT not initially 0"); + expected = $urandom() & 32'h000001FF; + write_radio(radio_num, REG_RX_ERR_REM_PORT, expected); + read_radio(radio_num, REG_RX_ERR_REM_PORT, val); + `ASSERT_ERROR(val == expected, "REG_RX_ERR_REM_PORT didn't update correctly"); + + // REG_RX_ERR_REM_EPID (read/write) + read_radio(radio_num, REG_RX_ERR_REM_EPID, val); + `ASSERT_ERROR(val == 0, "REG_RX_ERR_REM_EPID not initially 0"); + expected = $urandom() & 32'h0000FFFF; + write_radio(radio_num, REG_RX_ERR_REM_EPID, expected); + read_radio(radio_num, REG_RX_ERR_REM_EPID, val); + `ASSERT_ERROR(val == expected, "REG_RX_ERR_REM_EPID didn't update correctly"); + + // REG_RX_ERR_ADDR (read/write) + read_radio(radio_num, REG_RX_ERR_ADDR, val); + `ASSERT_ERROR(val == 0, "REG_RX_ERR_ADDR not initially 0"); + expected = $urandom() & 32'h000FFFFF; + write_radio(radio_num, REG_RX_ERR_ADDR, expected); + read_radio(radio_num, REG_RX_ERR_ADDR, val); + `ASSERT_ERROR(val == expected, "REG_RX_ERR_ADDR didn't update correctly"); + + // REG_RX_DATA (read-only) + temp = radio_tx_data[RADIO_W*radio_num +: RADIO_W]; + read_radio(radio_num, REG_RX_DATA, val); + `ASSERT_ERROR( + radio_rx_data[RADIO_W*radio_num +: RADIO_W] >= val && val >= temp, + "REG_RX_DATA wasn't in the expected range"); + read_radio(radio_num, REG_RX_DATA, temp); + `ASSERT_ERROR(temp != val, "REG_RX_DATA didn't update"); + + test.end_test(); + endtask : test_rx_registers + + + + task automatic test_tx_registers(int radio_num); + logic [31:0] val, expected; + + test.start_test("Tx Registers", 50us); + + // REG_TX_IDLE_VALUE (read/write) + read_radio(radio_num, REG_TX_IDLE_VALUE, val); + `ASSERT_ERROR(val == 0, "REG_TX_IDLE_VALUE not initially 0"); + expected = $urandom() & {ITEM_W{1'b1}}; + write_radio(radio_num, REG_TX_IDLE_VALUE, expected); + read_radio(radio_num, REG_TX_IDLE_VALUE, val); + `ASSERT_ERROR(val == expected, "REG_TX_IDLE_VALUE didn't update correctly"); + + // REG_TX_ERROR_POLICY (read/write) + read_radio(radio_num, REG_TX_ERROR_POLICY, val); + expected = TX_ERR_POLICY_PACKET; + `ASSERT_ERROR(val == expected, "REG_TX_ERROR_POLICY not initially 'PACKET'"); + expected = TX_ERR_POLICY_BURST; + write_radio(radio_num, REG_TX_ERROR_POLICY, expected); + read_radio(radio_num, REG_TX_ERROR_POLICY, val); + `ASSERT_ERROR(val == expected, "REG_TX_ERROR_POLICY didn't update to 'BURST'"); + expected = TX_ERR_POLICY_PACKET; + write_radio(radio_num, REG_TX_ERROR_POLICY, 32'h03); // Try to set both bits! + read_radio(radio_num, REG_TX_ERROR_POLICY, val); + `ASSERT_ERROR(val == expected, "REG_TX_ERROR_POLICY didn't revert to 'PACKET'"); + + // REG_TX_ERR_PORT (read/write) + read_radio(radio_num, REG_TX_ERR_PORT, val); + `ASSERT_ERROR(val == 0, "REG_TX_ERR_PORT not initially 0"); + expected = $urandom() & 32'h000001FF; + write_radio(radio_num, REG_TX_ERR_PORT, expected); + read_radio(radio_num, REG_TX_ERR_PORT, val); + `ASSERT_ERROR(val == expected, "REG_TX_ERR_PORT didn't update correctly"); + + // REG_TX_ERR_REM_PORT (read/write) + read_radio(radio_num, REG_TX_ERR_REM_PORT, val); + `ASSERT_ERROR(val == 0, "REG_TX_ERR_REM_PORT not initially 0"); + expected = $urandom() & 32'h000001FF; + write_radio(radio_num, REG_TX_ERR_REM_PORT, expected); + read_radio(radio_num, REG_TX_ERR_REM_PORT, val); + `ASSERT_ERROR(val == expected, "REG_TX_ERR_REM_PORT didn't update correctly"); + + // REG_TX_ERR_REM_EPID (read/write) + read_radio(radio_num, REG_TX_ERR_REM_EPID, val); + `ASSERT_ERROR(val == 0, "REG_TX_ERR_REM_EPID not initially 0"); + expected = $urandom() & 32'h0000FFFF; + write_radio(radio_num, REG_TX_ERR_REM_EPID, expected); + read_radio(radio_num, REG_TX_ERR_REM_EPID, val); + `ASSERT_ERROR(val == expected, "REG_TX_ERR_REM_EPID didn't update correctly"); + + // REG_TX_ERR_ADDR (read/write) + read_radio(radio_num, REG_TX_ERR_ADDR, val); + `ASSERT_ERROR(val == 0, "REG_TX_ERR_ADDR not initially 0"); + expected = $urandom() & 32'h000FFFFF; + write_radio(radio_num, REG_TX_ERR_ADDR, expected); + read_radio(radio_num, REG_TX_ERR_ADDR, val); + `ASSERT_ERROR(val == expected, "REG_TX_ERR_ADDR didn't update correctly"); + + test.end_test(); + endtask : test_tx_registers + + + + task automatic test_rx(int radio_num); + + //--------------------- + // Finite Acquisitions + //--------------------- + + test.start_test("Rx (finite)", 50us); + + // Set packet length + write_radio(radio_num, REG_RX_MAX_WORDS_PER_PKT, WPP); + + // Grab and verify a partial packet + start_rx(radio_num, WPP/2); + check_rx(radio_num, WPP/2); + + // Grab a minimally-sized packet + start_rx(radio_num, 1); + check_rx(radio_num, 1); + + // Grab and verify several packets + start_rx(radio_num, WPP*15/2); + check_rx(radio_num, WPP*15/2); + + // Wait long enough to receive another packet and then make sure we didn't + // receive anything. That is, make sure Rx actually stopped. + #MAX_PKT_WAIT; + `ASSERT_ERROR( + blk_ctrl.num_received(radio_num) == 0, + "Received more packets than expected" + ); + + test.end_test(); + + + //------------------------- + // Continuous Acquisitions + //------------------------- + + test.start_test("Rx (continuous)", 100us); + + start_rx(radio_num); + + // Grab and verify several packets + check_rx(radio_num, WPP*7); + stop_rx(radio_num); + + // Grab and discard any remaining packets + do begin + while (blk_ctrl.num_received(radio_num) != 0) begin + ChdrPacket #(CHDR_W) chdr_packet; + blk_ctrl.get_chdr(radio_num, chdr_packet); + end + #MAX_PKT_WAIT; + end while (blk_ctrl.num_received(radio_num) != 0); + + test.end_test(); + + + //-------------------------- + // Finite Timed Acquisition + //-------------------------- + + begin + ChdrPacket #(CHDR_W) chdr_packet; + chdr_word_t expected_time; + + test.start_test("Rx (finite, timed)", 100us); + + // Send Rx command with time in the future + expected_time = radio_time + 2000; + start_rx_timed(radio_num, WPP, expected_time); + + // Take a peak at the timestamp in the received packet to check it + blk_ctrl.peek_chdr(radio_num, chdr_packet); + `ASSERT_ERROR( + chdr_packet.timestamp == expected_time, + "Received packet didn't have expected timestamp" + ); + + // Verify the packet data + check_rx(radio_num, WPP); + test.end_test(); + end + + + //------------------------------ + // Continuous Timed Acquisition + //------------------------------ + + begin + ChdrPacket #(CHDR_W) chdr_packet; + chdr_word_t expected_time; + + test.start_test("Rx (continuous, timed)", 100us); + + // Send Rx command with time in the future + expected_time = radio_time + 2000; + start_rx_timed(radio_num, 0, expected_time); + + // Take a peak at the timestamp in the received packet to check it + blk_ctrl.peek_chdr(radio_num, chdr_packet); + `ASSERT_ERROR( + chdr_packet.timestamp == expected_time, + "Received packet didn't have expected timestamp" + ); + + // Verify a few packets + check_rx(radio_num, WPP*3); + stop_rx(radio_num); + + // Grab and discard any remaining packets + do begin + while (blk_ctrl.num_received(radio_num) != 0) begin + ChdrPacket #(CHDR_W) chdr_packet; + blk_ctrl.get_chdr(radio_num, chdr_packet); + end + #(MAX_PKT_WAIT); + end while (blk_ctrl.num_received(radio_num) != 0); + + test.end_test(); + end + + + //------------- + // Rx Overflow + //------------- + begin + logic [31:0] val; + + test.start_test("Rx (now, overflow)", 200us); + + // Configure the error reporting registers + write_radio(radio_num, REG_RX_ERR_PORT, TX_ERR_DST_PORT); + write_radio(radio_num, REG_RX_ERR_REM_PORT, TX_ERR_REM_DST_PORT); + write_radio(radio_num, REG_RX_ERR_REM_EPID, TX_ERR_REM_DST_EPID); + write_radio(radio_num, REG_RX_ERR_ADDR, TX_ERR_ADDRESS); + + // Stall the BFM to force a backup of data + blk_ctrl.set_slave_stall_prob(radio_num, 100); + + // Acquire continuously until we get an error + start_rx(radio_num); + + // Check that we're acquiring + read_radio(radio_num, REG_RX_STATUS, val); + `ASSERT_ERROR( + val[CMD_FIFO_SPACE_POS +: CMD_FIFO_SPACE_LEN] != CMD_FIFO_SPACE_MAX, + "Rx radio reports that it is not busy" + ); + + // Verify that we receive an error + check_error(ERR_RX_OVERRUN); + + // Restore the BFM stall probability + blk_ctrl.set_slave_stall_prob(radio_num, STALL_PROB); + + // Verify that Rx stopped + read_radio(radio_num, REG_RX_STATUS, val); + `ASSERT_ERROR( + val[CMD_FIFO_SPACE_POS +: CMD_FIFO_SPACE_LEN] == CMD_FIFO_SPACE_MAX, + "Rx radio reports that it is still busy after overflow" + ); + + // Discard any packets we received. Rx should eventually stop + // automatically after an overflow. + do begin + while (blk_ctrl.num_received(radio_num) != 0) begin + ChdrPacket #(CHDR_W) chdr_packet; + blk_ctrl.get_chdr(radio_num, chdr_packet); + end + #(MAX_PKT_WAIT); + end while (blk_ctrl.num_received(radio_num) != 0); + + test.end_test(); + end + + + //-------------- + // Late Command + //-------------- + + test.start_test("Rx (timed, late)", 100us); + + start_rx_timed(radio_num, WPP, radio_time); + check_error(ERR_RX_LATE_CMD); + + // Late command should be ignored. Make sure we didn't receive any packets. + begin + ChdrPacket #(CHDR_W) chdr_packet; + #(MAX_PKT_WAIT); + `ASSERT_ERROR( + blk_ctrl.num_received(radio_num) == 0, + "Packets received for late Rx command" + ); + + // Discard any remaining packets + while (blk_ctrl.num_received(radio_num)) blk_ctrl.get_chdr(radio_num, chdr_packet); + end + + test.end_test(); + + + //--------------- + // Command Queue + //--------------- + + test.start_test("Rx (queue multiple commands)"); + + begin + logic [31:0] expected, val; + + // Send one continuous command and verify the queue fullness + start_rx(radio_num); + expected = CMD_FIFO_SPACE_MAX-1; + read_radio(radio_num, REG_RX_STATUS, val); + `ASSERT_ERROR( + val[CMD_FIFO_SPACE_POS+:CMD_FIFO_SPACE_LEN] == expected, + "CMD_FIFO_SPACE did not decrement" + ); + + // Fill the command FIFO, going one over + for (int i = 0; i < CMD_FIFO_SPACE_MAX; i++) begin + start_rx(radio_num, WPP); + end + expected = 0; + read_radio(radio_num, REG_RX_STATUS, val); + `ASSERT_ERROR( + val[CMD_FIFO_SPACE_POS+:CMD_FIFO_SPACE_LEN] == expected, + "CMD_FIFO_SPACE did not reach 0" + ); + + // Issue stop command and verify that the FIFO empties + stop_rx(radio_num); + expected = CMD_FIFO_SPACE_MAX; + read_radio(radio_num, REG_RX_STATUS, val); + `ASSERT_ERROR( + val[CMD_FIFO_SPACE_POS+:CMD_FIFO_SPACE_LEN] == expected, + "CMD_FIFO_SPACE did not return to max" + ); + + // Grab and discard any remaining packets + do begin + while (blk_ctrl.num_received(radio_num) != 0) begin + ChdrPacket #(CHDR_W) chdr_packet; + blk_ctrl.get_chdr(radio_num, chdr_packet); + end + #MAX_PKT_WAIT; + end while (blk_ctrl.num_received(radio_num) != 0); + + // Queue several long commands back-to-back and make sure they all + // complete. The lengths are unique to ensure we execute the right + // commands in the expected order. + for (int i = 0; i < 3; i++) start_rx(radio_num, WPP*20+i); + for (int i = 0; i < 3; i++) check_rx(radio_num, WPP*20+i); + + // Make sure we don't get any more data + do begin + while (blk_ctrl.num_received(radio_num) != 0) begin + `ASSERT_ERROR(0, "Received unexpected packets"); + end + #MAX_PKT_WAIT; + end while (blk_ctrl.num_received(radio_num) != 0); + end + + test.end_test(); + + endtask : test_rx + + + + task automatic test_tx(int radio_num); + logic [RADIO_W-1:0] radio_data; + enum { WAIT_FOR_EOP, WAIT_FOR_EOB } policy; + + //------- + // Setup + //------- + + test.start_test("Tx Init", 50us); + + // Configure the error reporting registers + write_radio(radio_num, REG_TX_ERR_PORT, TX_ERR_DST_PORT); + write_radio(radio_num, REG_TX_ERR_REM_PORT, TX_ERR_REM_DST_PORT); + write_radio(radio_num, REG_TX_ERR_REM_EPID, TX_ERR_REM_DST_EPID); + write_radio(radio_num, REG_TX_ERR_ADDR, TX_ERR_ADDRESS); + + test.end_test(); + + + //--------------- + // Test Tx (now) + //--------------- + + test.start_test("Tx (now)", 50us); + + // Grab and verify a partial packet + start_tx(radio_num, WPP*3/4); + check_tx(radio_num, WPP*3/4); + check_error(ERR_TX_EOB_ACK); + + // Grab and verify multiple packets + start_tx(radio_num, WPP*3/2); + check_tx(radio_num, WPP*3/2); + check_error(ERR_TX_EOB_ACK); + + // Test a minimally-sized packet + start_tx(radio_num, 1); + check_tx(radio_num, 1); + check_error(ERR_TX_EOB_ACK); + + test.end_test(); + + + //--------------------- + // Test Tx (underflow) + //--------------------- + + test.start_test("Tx (now, underflow)", 50us); + + // Send some bursts without EOB + start_tx(radio_num, WPP*3/4, 1, 0); // Skip EOB + check_tx(radio_num, WPP*3/4); + check_error(ERR_TX_UNDERRUN); + + start_tx(radio_num, WPP*2, 1, 0); // Skip EOB + check_tx(radio_num, WPP*2); + check_error(ERR_TX_UNDERRUN); + + test.end_test(); + + + //----------------- + // Test Tx (timed) + //----------------- + + test.start_test("Tx (timed)", 50us); + + // Grab and verify a partial packet + start_tx_timed(radio_num, WPP*3/4, radio_time + 200); + check_tx_timed(radio_num, WPP*3/4, radio_time + 200); + check_error(ERR_TX_EOB_ACK); + + // Grab and verify whole packets + start_tx_timed(radio_num, WPP*2, radio_time + 200); + check_tx_timed(radio_num, WPP*2, radio_time + 200); + check_error(ERR_TX_EOB_ACK); + + test.end_test(); + + + //----------------- + // Test Tx (timed, underflow) + //----------------- + + test.start_test("Tx (timed, underflow)", 50us); + + // Send some bursts without EOB + start_tx_timed(radio_num, WPP*3/4, radio_time + 200, 1, 0); + check_tx_timed(radio_num, WPP*3/4, radio_time + 200); + check_error(ERR_TX_UNDERRUN); + + start_tx_timed(radio_num, WPP*2, radio_time + 200, 1, 0); + check_tx_timed(radio_num, WPP*2, radio_time + 200); + check_error(ERR_TX_UNDERRUN); + + test.end_test(); + + + //--------------------------- + // Test Tx (timed, late) + //--------------------------- + + test.start_test("Tx (timed, late)", 50us); + + // Test each error policy + policy = policy.first(); + do begin + // Set the policy + if (policy == WAIT_FOR_EOP) begin + write_radio(radio_num, REG_TX_ERROR_POLICY, TX_ERR_POLICY_PACKET); + end else if (policy == WAIT_FOR_EOB) begin + write_radio(radio_num, REG_TX_ERROR_POLICY, TX_ERR_POLICY_BURST); + end + +// Commenting out the fork code for now due to Vivado 2018.3 bug. +// radio_data = radio_tx_data[radio_num]; +// fork : tx_fork + // In this branch of the fork, we send the packets + repeat (2) begin + // Send late packets with random start value + start_tx_timed(radio_num, WPP*3, 0, $urandom()); + + if (policy == WAIT_FOR_EOP) begin + // We should get three errors, one for each packet + repeat (3) check_error(ERR_TX_LATE_DATA); + end else if (policy == WAIT_FOR_EOB) begin + // We should get one error for the entire burst + check_error(ERR_TX_LATE_DATA); + end + end + +// // The packets sent in the above branch of the fork should be +// // dropped. In this branch of the fork we make sure that the Tx +// // output doesn't change. +// begin +// forever begin +// @(posedge radio_clk) +// `ASSERT_ERROR( +// radio_data === radio_tx_data[radio_num], +// "Radio Tx output changed when late Tx packet should have been ignored" +// ); +// end +// end +// join_any +// +// // Stop checking the output +// disable tx_fork; + + policy = policy.next(); + end while (policy != policy.first()); + + // Make sure good transmissions can go through now. + start_tx_timed(radio_num, WPP, radio_time + 200); + check_tx_timed(radio_num, WPP, radio_time + 200); + check_error(ERR_TX_EOB_ACK); + + test.end_test(); + + endtask : test_tx + + + + // Test internal loopback and idle value + task automatic test_loopback_and_idle(int radio_num); + int byte_length; + chdr_word_t data[$]; + bit [ITEM_W-1:0] idle; + + //---------------------------- + // Use IDLE value to loopback + //---------------------------- + + test.start_test("Idle Loopback", 50us); + + // Turn on loopback + write_radio(radio_num, REG_LOOPBACK_EN, 1); + + // This test ensures we get the Tx output on Rx and not the TB's simulated + // radio data. It also tests updating the idle value. Run the test twice to + // make sure the IDLE value updates. + repeat (2) begin + // Set idle value + idle = $urandom(); + write_radio(radio_num, REG_TX_IDLE_VALUE, idle); + + // Grab a radio word and check that it equals the IDLE value + write_radio_64(radio_num, REG_RX_CMD_NUM_WORDS_LO, 1); + write_radio(radio_num, REG_RX_CMD, RX_CMD_FINITE); + blk_ctrl.recv(radio_num, data, byte_length); + + // Check the length + `ASSERT_ERROR(byte_length == RADIO_W/8, "Didn't receive expected length"); + + // Check the payload + foreach (data[i]) begin + chdr_word_t word; + word = data[i]; // Work around Vivado 2018.3 issue + `ASSERT_ERROR( + word == {$bits(chdr_word_t)/ITEM_W{idle}}, + "Loopback data didn't match expected" + ); + end + end + + test.end_test(); + + + //--------------------- + // Loopback Tx packets + //--------------------- + + test.start_test("Tx Loopback", 50us); + + // This test ensures that loopback isn't reordering words or anything else + // unexpected. + + // Configure the Tx error reporting registers + write_radio(radio_num, REG_TX_ERR_PORT, TX_ERR_DST_PORT); + write_radio(radio_num, REG_TX_ERR_REM_PORT, TX_ERR_REM_DST_PORT); + write_radio(radio_num, REG_TX_ERR_REM_EPID, TX_ERR_REM_DST_EPID); + write_radio(radio_num, REG_TX_ERR_ADDR, TX_ERR_ADDRESS); + + // Set packet length + write_radio(radio_num, REG_RX_MAX_WORDS_PER_PKT, WPP); + + // Loopback a few packets, back-to-back. This code has a race condition + // since there's a delay between when we start Tx and when Rx starts, due + // to how long it takes to write the Rx registers. Therefore, we transmit a + // lot more packets than we receive to ensure we're still transmitting by + // the time we receive. + start_tx(radio_num, WPP*16); + start_rx(radio_num, WPP*2); + + // Check the results + check_rx(radio_num, WPP*2); + check_error(ERR_TX_EOB_ACK); + + // Turn off loopback + write_radio(radio_num, REG_LOOPBACK_EN, 0); + + test.end_test(); + endtask : test_loopback_and_idle; + + + + //--------------------------------------------------------------------------- + // Test Process + //--------------------------------------------------------------------------- + + timeout_t timeout; + + initial begin : main + string tb_name; + + //------------------------------------------------------------------------- + // Initialization + //------------------------------------------------------------------------- + + // Generate a string for the name of this instance of the testbench + tb_name = $sformatf( + "rfnoc_block_radio_tb\nCHDR_W = %0D, ITEM_W = %0D, NIPC = %0D, NUM_PORTS = %0D, STALL_PROB = %0D, STB_PROB = %0D, TEST_REGS = %0D", + CHDR_W, ITEM_W, NIPC, NUM_PORTS, STALL_PROB, STB_PROB, TEST_REGS + ); + + test.start_tb(tb_name); + + // Don't start the clocks until after start_tb() returns. This ensures that + // the clocks aren't toggling while other instances of this testbench are + // running, which speeds up simulation time. + rfnoc_chdr_clk_gen.start(); + rfnoc_ctrl_clk_gen.start(); + radio_clk_gen.start(); + + // Setup and start the stream endpoint BFM + blk_ctrl = new(backend, m_ctrl, s_ctrl); + for (int i = 0; i < NUM_PORTS; i++) begin + // I'd love to do this: + // void'(blk_ctrl.add_master_data_port(m_chdr[i])); + // void'(blk_ctrl.add_slave_data_port(s_chdr[i])); + // But interface indices must be constant. So instead, we use a semaphore + // to trigger port initialization and control the order of initialization + // in the generate block gen_radio_connections. + + // Put the port number in the semaphore to cause its initializer to run + port_sem.put(i+1); + // Delay to allow gen_radio_connections to run + #0; + // Get the port number again to know when it's done + port_sem.get(i+1); + + // Set the CHDR BFM stall probability + blk_ctrl.set_master_stall_prob(i, STALL_PROB); + blk_ctrl.set_slave_stall_prob(i, STALL_PROB); + end + blk_ctrl.run(); + + + //------------------------------------------------------------------------- + // Reset + //------------------------------------------------------------------------- + + test.start_test("Flush block then reset it", 10us); + blk_ctrl.flush_and_reset(); + test.end_test(); + + + //------------------------------------------------------------------------- + // Test Sequences + //------------------------------------------------------------------------- + + // Run register tests first, since they check that initial values are + // correct. + + test_block_info(); + if (TEST_REGS) test_shared_registers(); + + for (int radio_num = 0; radio_num < NUM_PORTS; radio_num++) begin + $display("************************************************************"); + $display("Testing Radio Channel %0d", radio_num); + $display("************************************************************"); + if (TEST_REGS) begin + test_general_registers(radio_num); + test_rx_registers(radio_num); + test_tx_registers(radio_num); + end + test_rx(radio_num); + test_tx(radio_num); + test_loopback_and_idle(radio_num); + end + + + //------------------------------------------------------------------------- + // Finish + //------------------------------------------------------------------------- + + // End the TB, but don't $finish, since we don't want to kill other + // instances of this testbench that may be running. + test.end_tb(0); + + // Kill the clocks to end this instance of the testbench + rfnoc_chdr_clk_gen.kill(); + rfnoc_ctrl_clk_gen.kill(); + radio_clk_gen.kill(); + + end : main + +endmodule : rfnoc_block_radio_tb diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rx_frontend_gen3.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rx_frontend_gen3.v new file mode 100644 index 000000000..54529136b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/rx_frontend_gen3.v @@ -0,0 +1,246 @@ +// +// Copyright 2015 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module rx_frontend_gen3 #( + parameter SR_MAG_CORRECTION = 0, + parameter SR_PHASE_CORRECTION = 1, + parameter SR_OFFSET_I = 2, + parameter SR_OFFSET_Q = 3, + parameter SR_IQ_MAPPING = 4, + parameter SR_HET_PHASE_INCR = 5, + parameter BYPASS_DC_OFFSET_CORR = 0, + parameter BYPASS_IQ_COMP = 0, + parameter BYPASS_REALMODE_DSP = 0, + parameter DEVICE = "7SERIES" +)( + input clk, input reset, input sync_in, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + input adc_stb, input [15:0] adc_i, input [15:0] adc_q, + output rx_stb, output [15:0] rx_i, output [15:0] rx_q +); + + wire realmode; + wire swap_iq; + wire invert_i; + wire invert_q; + wire realmode_decim; + wire bypass_all; + wire [1:0] iq_map_reserved; + wire [17:0] mag_corr, phase_corr; + wire phase_dir; + wire phase_sync; + + reg [23:0] adc_i_mux, adc_q_mux; + reg adc_mux_stb; + wire [23:0] adc_i_ofs, adc_q_ofs, adc_i_comp, adc_q_comp; + reg [23:0] adc_i_ofs_dly, adc_q_ofs_dly; + wire adc_ofs_stb, adc_comp_stb; + reg [1:0] adc_ofs_stb_dly; + wire [23:0] adc_i_dsp, adc_q_dsp; + wire adc_dsp_stb; + wire [35:0] corr_i, corr_q; + wire [15:0] rx_i_out, rx_q_out; + + /******************************************************** + ** Settings Bus Registers + ********************************************************/ + setting_reg #(.my_addr(SR_MAG_CORRECTION),.width(18)) sr_mag_corr ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(mag_corr),.changed()); + + setting_reg #(.my_addr(SR_PHASE_CORRECTION),.width(18)) sr_phase_corr ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(phase_corr),.changed()); + + setting_reg #(.my_addr(SR_IQ_MAPPING), .width(8)) sr_mux_sel ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out({bypass_all,iq_map_reserved,realmode_decim,invert_i,invert_q,realmode,swap_iq}),.changed()); + + // Setting reg: 1 bit to set phase direction: default to 0: + // direction bit == 0: the phase is increased by pi/2 (counter clockwise) + // direction bit == 1: the phase is increased by -pi/2 (clockwise) + setting_reg #(.my_addr(SR_HET_PHASE_INCR), .width(1)) sr_phase_dir ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(phase_dir),.changed(phase_sync)); + + /******************************************************** + ** IQ Mapping (swapping, inversion, real-mode) + ********************************************************/ + // MUX so we can do realmode signals on either input + always @(posedge clk) begin + if (swap_iq) begin + adc_i_mux[23:8] <= invert_q ? ~adc_q : adc_q; + adc_q_mux[23:8] <= realmode ? 16'd0 : invert_i ? ~adc_i : adc_i; + end else begin + adc_i_mux[23:8] <= invert_i ? ~adc_i : adc_i; + adc_q_mux[23:8] <= realmode ? 16'd0 : invert_q ? ~adc_q : adc_q; + end + adc_mux_stb <= adc_stb; + adc_i_mux[7:0] <= 8'd0; + adc_q_mux[7:0] <= 8'd0; + end + + /******************************************************** + ** DC offset Correction + ********************************************************/ + generate + if (BYPASS_DC_OFFSET_CORR == 0) begin + + rx_dcoffset #(.WIDTH(24),.ADDR(SR_OFFSET_I)) rx_dcoffset_i ( + .clk(clk),.rst(reset),.set_stb(set_stb),.set_addr(set_addr),.set_data(set_data), + .in_stb(adc_mux_stb),.in(adc_i_mux), + .out_stb(adc_ofs_stb),.out(adc_i_ofs)); + rx_dcoffset #(.WIDTH(24),.ADDR(SR_OFFSET_Q)) rx_dcoffset_q ( + .clk(clk),.rst(reset),.set_stb(set_stb),.set_addr(set_addr),.set_data(set_data), + .in_stb(adc_mux_stb),.in(adc_q_mux), + .out_stb(),.out(adc_q_ofs)); + + end else begin + assign adc_ofs_stb = adc_mux_stb; + assign adc_i_ofs = adc_i_mux; + assign adc_q_ofs = adc_q_mux; + end + endgenerate + + /******************************************************** + ** IQ Imbalance Compensation + ********************************************************/ + generate + if (BYPASS_IQ_COMP == 0) begin + + mult_add_clip #( + .WIDTH_A(18), + .BIN_PT_A(17), + .WIDTH_B(18), + .BIN_PT_B(17), + .WIDTH_C(24), + .BIN_PT_C(23), + .WIDTH_O(24), + .BIN_PT_O(23), + .LATENCY(2) + ) mult_i ( + .clk(clk), + .reset(reset), + .CE(1'b1), + .A(adc_i_ofs[23:6]), + .B(mag_corr), + .C(adc_i_ofs), + .O(adc_i_comp) + ); + + mult_add_clip #( + .WIDTH_A(18), + .BIN_PT_A(17), + .WIDTH_B(18), + .BIN_PT_B(17), + .WIDTH_C(24), + .BIN_PT_C(23), + .WIDTH_O(24), + .BIN_PT_O(23), + .LATENCY(2) + ) mult_q ( + .clk(clk), + .reset(reset), + .CE(1'b1), + .A(adc_i_ofs[23:6]), + .B(phase_corr), + .C(adc_q_ofs), + .O(adc_q_comp) + ); + + // Delay to match path latencies + always @(posedge clk) begin + if (reset) begin + adc_ofs_stb_dly <= 2'b0; + end else begin + adc_ofs_stb_dly <= {adc_ofs_stb_dly[0], adc_ofs_stb}; + end + end + + assign adc_comp_stb = adc_ofs_stb_dly[1]; + + end else begin + assign adc_comp_stb = adc_ofs_stb; + assign adc_i_comp = adc_i_ofs; + assign adc_q_comp = adc_q_ofs; + end + endgenerate + + /******************************************************** + ** Realmode DSP: + * - Heterodyne frequency translation + * - Realmode decimation (by 2) + ********************************************************/ + generate + if (BYPASS_REALMODE_DSP == 0) begin + + wire [24:0] adc_i_dsp_cout, adc_q_dsp_cout; + wire [23:0] adc_i_cclip, adc_q_cclip; + wire [23:0] adc_i_hb, adc_q_hb; + wire [23:0] adc_i_dec, adc_q_dec; + wire adc_dsp_cout_stb; + wire adc_cclip_stb; + wire adc_hb_stb; + + wire valid_hbf0; + wire valid_hbf1; + wire valid_dec0; + wire valid_dec1; + + // 90 degree mixer + quarter_rate_downconverter #(.WIDTH(24)) qr_dc_i( + .clk(clk), .reset(reset || sync_in), .phase_sync(phase_sync), + .i_tdata({adc_i_comp, adc_q_comp}), .i_tlast(1'b1), .i_tvalid(adc_comp_stb), .i_tready(), + .o_tdata({adc_i_dsp_cout, adc_q_dsp_cout}), .o_tlast(), .o_tvalid(adc_dsp_cout_stb), .o_tready(1'b1), + .dirctn(phase_dir)); + + // Double FIR and decimator block + localparam HB_COEFS = {-18'd62, 18'd0, 18'd194, 18'd0, -18'd440, 18'd0, 18'd855, 18'd0, -18'd1505, 18'd0, 18'd2478, 18'd0, + -18'd3900, 18'd0, 18'd5990, 18'd0, -18'd9187, 18'd0, 18'd14632, 18'd0, -18'd26536, 18'd0, 18'd83009, 18'd131071, 18'd83009, + 18'd0, -18'd26536, 18'd0, 18'd14632, 18'd0, -18'd9187, 18'd0, 18'd5990, 18'd0, -18'd3900, 18'd0, 18'd2478, 18'd0, -18'd1505, + 18'd0, 18'd855, 18'd0, -18'd440, 18'd0, 18'd194, 18'd0, -18'd62}; + + axi_fir_filter_dec #( + .WIDTH(24), + .COEFF_WIDTH(18), + .NUM_COEFFS(47), + .COEFFS_VEC(HB_COEFS), + .BLANK_OUTPUT(0) + ) ffd0 ( + .clk(clk), .reset(reset || sync_in), + + .i_tdata({adc_i_dsp_cout, adc_q_dsp_cout}), + .i_tlast(1'b1), + .i_tvalid(adc_dsp_cout_stb), + .i_tready(), + + .o_tdata({adc_i_dec, adc_q_dec}), + .o_tlast(), + .o_tvalid(adc_hb_stb), + .o_tready(1'b1)); + + assign adc_dsp_stb = realmode_decim ? adc_hb_stb : adc_comp_stb; + assign adc_i_dsp = realmode_decim ? adc_i_dec : adc_i_comp; + assign adc_q_dsp = realmode_decim ? adc_q_dec : adc_q_comp; + + end else begin + assign adc_dsp_stb = adc_comp_stb; + assign adc_i_dsp = adc_i_comp; + assign adc_q_dsp = adc_q_comp; + end + endgenerate + + // Round to short complex (sc16) + round_sd #(.WIDTH_IN(24),.WIDTH_OUT(16)) round_i ( + .clk(clk),.reset(reset), .in(adc_i_dsp),.strobe_in(adc_dsp_stb), .out(rx_i_out), .strobe_out(rx_stb)); + round_sd #(.WIDTH_IN(24),.WIDTH_OUT(16)) round_q ( + .clk(clk),.reset(reset), .in(adc_q_dsp),.strobe_in(adc_dsp_stb), .out(rx_q_out), .strobe_out()); + + assign rx_i = bypass_all ? adc_i : rx_i_out; + assign rx_q = bypass_all ? adc_q : rx_q_out; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/sim_radio_gen.sv b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/sim_radio_gen.sv new file mode 100644 index 000000000..a6f827f8f --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/sim_radio_gen.sv @@ -0,0 +1,104 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: sim_radio_gen +// +// Description: Generate radio data for simulation purposes. The strobe pattern +// is random, which is not like a normal radio but covers every possibility. +// The data pattern is an incrementing sequence of samples, with each channel +// starting at a different value to differentiate them. Strobe and time are +// common between channels. +// + +module sim_radio_gen #( + parameter int NSPC = 1, // Number of samples per clock cycle + parameter int SAMP_W = 32, // Length of each radio sample + parameter int NUM_CHANNELS = 1, // Number of radio RX ports + parameter int STB_PROB = 50, // Probability of STB being asserted on each clock cycle + parameter int INCREMENT = 2, // Amount by which to increment + parameter int PPS_PERIOD = 50 // Period of the PPS output +) ( + input bit radio_clk, + input bit radio_rst, + output bit [NUM_CHANNELS*SAMP_W*NSPC-1:0] radio_rx_data, + output bit [ NUM_CHANNELS-1:0] radio_rx_stb, + output bit [ 63:0] radio_time, + output bit radio_pps +); + + localparam int RADIO_W = SAMP_W*NSPC; + typedef bit [RADIO_W-1:0] radio_t; // Radio output word + typedef bit [SAMP_W-1:0] sample_t; // Single sample + + initial assert (PPS_PERIOD % INCREMENT == 0) else + $fatal(1, "PPS_PERIOD must be a multiple of INCREMENT"); + + + // Generate an initial value all radio channels + function radio_t [NUM_CHANNELS-1:0] radio_init(); + radio_t [NUM_CHANNELS-1:0] ret_val; + + for (int n = 0; n < NUM_CHANNELS; n++) begin + sample_t sample; + + // Calculate the value of first sample in this radio channel + sample = sample_t'((2.0 ** SAMP_W) / NUM_CHANNELS * n); + + // Calculate the value of subsequent samples in the channel + for (int s = 0; s < NSPC; s++) begin + ret_val[n][s*SAMP_W +: SAMP_W] = sample + s; + end + end + + return ret_val; + endfunction : radio_init + + + //--------------------------------------------------------------------------- + // Radio Data Generation + //--------------------------------------------------------------------------- + + radio_t [NUM_CHANNELS-1:0] data = radio_init(); + + assign radio_rx_data = data; + + always @(posedge radio_clk) begin : radio_data_count_reg + if (radio_rst) begin + data <= radio_init(); + radio_rx_stb <= '0; + end else begin + radio_rx_stb <= '0; + if ($urandom_range(100) < STB_PROB) begin + for (int n = 0; n < NUM_CHANNELS; n++) begin + for (int s = 0; s < NSPC; s++) begin + data[n][s*SAMP_W +: SAMP_W] <= data[n][s*SAMP_W +: SAMP_W] + NSPC; + end + end + radio_rx_stb <= '1; + end + end + end : radio_data_count_reg + + + //--------------------------------------------------------------------------- + // Radio Time + //--------------------------------------------------------------------------- + + always @(posedge radio_clk) begin + if (radio_rst) begin + radio_time <= 64'b0; + radio_pps <= 1'b0; + end else begin + radio_pps <= 1'b0; + if (radio_rx_stb[0]) begin + radio_time <= radio_time + INCREMENT; + if (radio_time % PPS_PERIOD == 0 && radio_time != 0) begin + radio_pps <= 1'b1; + end + end + end + end + +endmodule : sim_radio_gen
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/tx_frontend_gen3.v b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/tx_frontend_gen3.v new file mode 100644 index 000000000..f5435787d --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/blocks/rfnoc_block_radio/tx_frontend_gen3.v @@ -0,0 +1,173 @@ +// +// Copyright 2015 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module tx_frontend_gen3 #( + parameter SR_OFFSET_I = 0, + parameter SR_OFFSET_Q = 1, + parameter SR_MAG_CORRECTION = 2, + parameter SR_PHASE_CORRECTION = 3, + parameter SR_MUX = 4, + parameter BYPASS_DC_OFFSET_CORR = 0, + parameter BYPASS_IQ_COMP = 0, + parameter DEVICE = "7SERIES" +)( + input clk, input reset, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + input tx_stb, input [15:0] tx_i, input [15:0] tx_q, + output reg dac_stb, output reg [15:0] dac_i, output reg [15:0] dac_q +); + + wire [23:0] i_dco, q_dco; + wire [7:0] mux_ctrl; + wire [17:0] mag_corr, phase_corr; + + wire [35:0] corr_i, corr_q; + reg [1:0] tx_stb_dly; + reg [23:0] tx_i_dly, tx_q_dly; + wire tx_comp_stb, tx_ofs_stb; + wire [23:0] tx_i_comp, tx_q_comp, tx_i_ofs, tx_q_ofs; + wire tx_round_stb; + wire [15:0] tx_i_round, tx_q_round; + + /******************************************************** + ** Settings Registers + ********************************************************/ + setting_reg #(.my_addr(SR_OFFSET_I), .width(24)) sr_i_dc_offset ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(i_dco),.changed()); + + setting_reg #(.my_addr(SR_OFFSET_Q), .width(24)) sr_q_dc_offset ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(q_dco),.changed()); + + setting_reg #(.my_addr(SR_MAG_CORRECTION),.width(18)) sr_mag_corr ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(mag_corr),.changed()); + + setting_reg #(.my_addr(SR_PHASE_CORRECTION),.width(18)) sr_phase_corr ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(phase_corr),.changed()); + + setting_reg #(.my_addr(SR_MUX), .width(8), .at_reset(8'h10)) sr_mux_ctrl ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(mux_ctrl),.changed()); + + /******************************************************** + ** DSP + ********************************************************/ + // I/Q compensation with option to bypass + generate + if (BYPASS_IQ_COMP == 0) begin + + mult_add_clip #( + .WIDTH_A(16), + .BIN_PT_A(15), + .WIDTH_B(18), + .BIN_PT_B(17), + .WIDTH_C(16), + .BIN_PT_C(15), + .WIDTH_O(24), + .BIN_PT_O(23), + .LATENCY(2) + ) mult_i ( + .clk(clk), + .reset(reset), + .CE(1'b1), + .A(tx_i), + .B(mag_corr), + .C(tx_i), + .O(tx_i_comp) + ); + + mult_add_clip #( + .WIDTH_A(16), + .BIN_PT_A(15), + .WIDTH_B(18), + .BIN_PT_B(17), + .WIDTH_C(16), + .BIN_PT_C(15), + .WIDTH_O(24), + .BIN_PT_O(23), + .LATENCY(2) + ) mult_q ( + .clk(clk), + .reset(reset), + .CE(1'b1), + .A(tx_i), + .B(phase_corr), + .C(tx_q), + .O(tx_q_comp) + ); + + // Delay to match path latencies + always @(posedge clk) begin + if (reset) begin + tx_stb_dly <= 2'b0; + end else begin + tx_stb_dly <= {tx_stb_dly[0], tx_stb}; + end + end + + assign tx_comp_stb = tx_stb_dly[1]; + + end else begin + assign tx_comp_stb = tx_stb; + assign tx_i_comp = {tx_i,8'd0}; + assign tx_q_comp = {tx_q,8'd0}; + end + endgenerate + + // DC offset correction + generate + if (BYPASS_DC_OFFSET_CORR == 0) begin + add2_and_clip_reg #(.WIDTH(24)) add_dco_i ( + .clk(clk), .rst(reset), .in1(i_dco), .in2(tx_i_comp), .strobe_in(tx_comp_stb), .sum(tx_i_ofs), .strobe_out(tx_ofs_stb)); + add2_and_clip_reg #(.WIDTH(24)) add_dco_q ( + .clk(clk), .rst(reset), .in1(q_dco), .in2(tx_q_comp), .strobe_in(tx_comp_stb), .sum(tx_q_ofs), .strobe_out()); + end else begin + assign tx_ofs_stb = tx_comp_stb; + assign tx_i_ofs = tx_i_comp; + assign tx_q_ofs = tx_q_comp; + end + endgenerate + + // Round to short complex (sc16) + round_sd #(.WIDTH_IN(24),.WIDTH_OUT(16)) round_i ( + .clk(clk),.reset(reset), .in(tx_i_ofs),.strobe_in(tx_ofs_stb), .out(tx_i_round), .strobe_out(tx_round_stb)); + round_sd #(.WIDTH_IN(24),.WIDTH_OUT(16)) round_q ( + .clk(clk),.reset(reset), .in(tx_q_ofs),.strobe_in(tx_ofs_stb), .out(tx_q_round), .strobe_out()); + + // Mux + // Muxing logic matches that in tx_frontend.v, and what tx_frontend_core_200.cpp expects. + // + // mux_ctrl ! 0+0 ! 0+16 ! 1+0 ! 1+16 + // =========!======!======!======!======== + // DAC_I ! tx_i ! tx_i ! tx_q ! tx_q + // DAC_Q ! tx_i ! tx_q ! tx_i ! tx_q + // + // Most daughterboards will thus use 0x01 or 0x10 as the mux_ctrl value. + always @(posedge clk) begin + if (reset) begin + dac_stb <= 1'b0; + dac_i <= 16'd0; + dac_q <= 16'd0; + end else begin + dac_stb <= tx_round_stb; + case(mux_ctrl[3:0]) + 0 : dac_i <= tx_i_round; + 1 : dac_i <= tx_q_round; + default : dac_i <= 0; + endcase + case(mux_ctrl[7:4]) + 0 : dac_q <= tx_i_round; + 1 : dac_q <= tx_q_round; + default : dac_q <= 0; + endcase + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/cadd.v b/fpga/usrp3/lib/rfnoc/cadd.v new file mode 100644 index 000000000..d571b3440 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/cadd.v @@ -0,0 +1,32 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Complex adder + +module cadd + #(parameter WIDTH=16) + (input clk, input reset, + input [WIDTH*2-1:0] a_tdata, input a_tlast, input a_tvalid, output a_tready, + input [WIDTH*2-1:0] b_tdata, input b_tlast, input b_tvalid, output b_tready, + output [WIDTH*2-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + wire int_tlast = a_tlast | b_tlast; + wire int_tvalid, int_tready; + wire [WIDTH*2-1:0] int_tdata; + + assign int_tdata[WIDTH*2-1:WIDTH] = a_tdata[WIDTH*2-1:WIDTH] + b_tdata[WIDTH*2-1:WIDTH]; + assign int_tdata[WIDTH-1:0] = a_tdata[WIDTH-1:0] + b_tdata[WIDTH-1:0]; + + assign int_tvalid = a_tvalid & b_tvalid; + assign a_tready = int_tvalid & int_tready; + assign b_tready = a_tready; + + axi_fifo #(.WIDTH(WIDTH*2+1), .SIZE(0)) flop_output + (.clk(clk), .reset(reset), .clear(1'b0), + .i_tdata({int_tlast, int_tdata}), .i_tvalid(int_tvalid), .i_tready(int_tready), + .o_tdata({o_tlast, o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready)); + +endmodule // cadd diff --git a/fpga/usrp3/lib/rfnoc/chdr_deframer.v b/fpga/usrp3/lib/rfnoc/chdr_deframer.v new file mode 100644 index 000000000..408c26e9a --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/chdr_deframer.v @@ -0,0 +1,108 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// FIXME -- detect seqnum errors? + +module chdr_deframer #( + parameter WIDTH = 32 // Can be 32 or 64 +)( input clk, input reset, input clear, + input [63:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] o_tdata, output [127:0] o_tuser, output o_tlast, output o_tvalid, input o_tready); + + localparam ST_HEAD = 2'd0; + localparam ST_TIME = 2'd1; + localparam ST_BODY = 2'd2; + + reg [1:0] chdr_state; + reg odd_length; + + wire [127:0] hdr_i_tuser, hdr_o_tuser; + wire hdr_i_tvalid, hdr_i_tready; + wire hdr_o_tvalid, hdr_o_tready; + + wire [63:0] body_i_tdata, body_o_tdata; + wire body_i_tlast, body_o_tlast; + wire body_i_tvalid, body_o_tvalid; + wire body_i_tready, body_o_tready; + + wire has_time = i_tdata[61]; + wire [15:0] len = i_tdata[47:32]; + reg [63:0] held_i_tdata; + + assign body_i_tdata = i_tdata; + assign body_i_tlast = i_tlast; + assign body_i_tvalid = (chdr_state == ST_BODY) ? i_tvalid : 1'b0; + + assign hdr_i_tuser = (chdr_state == ST_HEAD) ? { i_tdata, i_tdata } : { held_i_tdata, i_tdata }; // 2nd half ignored if no time + assign hdr_i_tvalid = (chdr_state == ST_TIME) ? i_tvalid : + ((chdr_state == ST_HEAD) & ~has_time) ? i_tvalid : + 1'b0; + + assign i_tready = (chdr_state == ST_BODY) ? body_i_tready : hdr_i_tready; + + // FIXME handle packets with no body + always @(posedge clk) + if(reset | clear) + chdr_state <= ST_HEAD; + else + case(chdr_state) + ST_HEAD : + if(i_tvalid & hdr_i_tready) + if(has_time) + begin + chdr_state <= ST_TIME; + held_i_tdata <= i_tdata; + end + else + chdr_state <= ST_BODY; + ST_TIME : + if(i_tvalid & hdr_i_tready) + chdr_state <= ST_BODY; + ST_BODY : + if(i_tvalid & body_i_tready & i_tlast) + chdr_state <= ST_HEAD; + endcase // case (chdr_state) + + axi_fifo #(.WIDTH(128), .SIZE(5)) hdr_fifo + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata(hdr_i_tuser), .i_tvalid(hdr_i_tvalid), .i_tready(hdr_i_tready), + .o_tdata(hdr_o_tuser), .o_tvalid(hdr_o_tvalid), .o_tready(hdr_o_tready), + .occupied(), .space()); + + axi_fifo #(.WIDTH(65), .SIZE(5)) body_fifo + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata({body_i_tlast, body_i_tdata}), .i_tvalid(body_i_tvalid), .i_tready(body_i_tready), + .o_tdata({body_o_tlast, body_o_tdata}), .o_tvalid(body_o_tvalid), .o_tready(body_o_tready), + .occupied(), .space()); + + assign o_tuser = hdr_o_tuser; + assign o_tvalid = hdr_o_tvalid & body_o_tvalid; + assign hdr_o_tready = o_tvalid & o_tready & o_tlast; + + generate if (WIDTH == 32) begin + reg second_half; + wire odd_len = hdr_o_tuser[98] ^ |hdr_o_tuser[97:96]; + + always @(posedge clk) + if(reset | clear) + second_half <= 1'b0; + else + if(o_tvalid & o_tready) + if(o_tlast) + second_half <= 1'b0; + else + second_half <= ~second_half; + + assign o_tdata = second_half ? body_o_tdata[31:0] : body_o_tdata[63:32]; + assign o_tlast = body_o_tlast & (second_half | odd_len); + assign body_o_tready = o_tvalid & o_tready & (o_tlast | second_half); + end else if (WIDTH == 64) begin + assign o_tdata = body_o_tdata; + assign o_tlast = body_o_tlast; + assign body_o_tready = o_tvalid & o_tready; + end endgenerate + +endmodule // chdr_deframer diff --git a/fpga/usrp3/lib/rfnoc/chdr_deframer_2clk.v b/fpga/usrp3/lib/rfnoc/chdr_deframer_2clk.v new file mode 100644 index 000000000..e15263b09 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/chdr_deframer_2clk.v @@ -0,0 +1,139 @@ +///////////////////////////////////////////////////////////////////// +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_deframer_2clk +// Description: +// - Takes a sample stream in and uses the tuser input to frame +// a CHDR packet which is output by the module +// samples at the output +// +///////////////////////////////////////////////////////////////////// + + +module chdr_deframer_2clk #( + parameter WIDTH = 32 // 32 and 64 bits supported +) ( + input samp_clk, input samp_rst, input pkt_clk, input pkt_rst, + input [63:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] o_tdata, output [127:0] o_tuser, output o_tlast, output o_tvalid, input o_tready +); + + localparam [1:0] ST_HEAD = 2'd0; + localparam [1:0] ST_TIME = 2'd1; + localparam [1:0] ST_BODY = 2'd2; + + reg [1:0] chdr_state; + + wire [127:0] hdr_i_tuser, hdr_o_tuser; + wire hdr_i_tvalid, hdr_i_tready; + wire hdr_o_tvalid, hdr_o_tready; + + wire [63:0] body_i_tdata, body_o_tdata; + wire body_i_tlast, body_o_tlast; + wire body_i_tvalid, body_o_tvalid; + wire body_i_tready, body_o_tready; + + wire has_time = i_tdata[61]; + reg [63:0] held_i_tdata; + reg second_half; + + assign body_i_tdata = i_tdata; + assign body_i_tlast = i_tlast; + assign body_i_tvalid = (chdr_state == ST_BODY) ? i_tvalid : 1'b0; + + assign hdr_i_tuser = (chdr_state == ST_HEAD) ? { i_tdata, i_tdata } : { held_i_tdata, i_tdata }; // 2nd half ignored if no time + assign hdr_i_tvalid = (chdr_state == ST_TIME) ? i_tvalid : + ((chdr_state == ST_HEAD) & ~has_time) ? i_tvalid : + 1'b0; + + assign i_tready = (chdr_state == ST_BODY) ? body_i_tready : hdr_i_tready; + + // FIXME handle packets with no body + always @(posedge pkt_clk) begin + if (pkt_rst) begin + chdr_state <= ST_HEAD; + end else begin + case(chdr_state) + ST_HEAD: + if (i_tvalid & hdr_i_tready) + if (has_time) begin + chdr_state <= ST_TIME; + held_i_tdata <= i_tdata; + end else begin + chdr_state <= ST_BODY; + end + ST_TIME: + if (i_tvalid & hdr_i_tready) + chdr_state <= ST_BODY; + ST_BODY: + if (i_tvalid & body_i_tready & i_tlast) + chdr_state <= ST_HEAD; + endcase + end + end + + wire pkt_rst_stretch; + pulse_stretch #(.SCALE('d10)) pkt_reset_i ( + .clk(pkt_clk), + .rst(1'b0), + .pulse(pkt_rst), + .pulse_stretched(pkt_rst_stretch) + ); + + axi_fifo_2clk #(.WIDTH(128), .SIZE(5)) hdr_fifo_i ( + .i_aclk(pkt_clk), .o_aclk(samp_clk), .reset(pkt_rst_stretch), + .i_tdata(hdr_i_tuser), .i_tvalid(hdr_i_tvalid), .i_tready(hdr_i_tready), + .o_tdata(hdr_o_tuser), .o_tvalid(hdr_o_tvalid), .o_tready(hdr_o_tready) + ); + + axi_fifo_2clk #(.WIDTH(65), .SIZE(9)) body_fifo ( + .i_aclk(pkt_clk), .o_aclk(samp_clk), .reset(pkt_rst_stretch), + .i_tdata({body_i_tlast, body_i_tdata}), .i_tvalid(body_i_tvalid), .i_tready(body_i_tready), + .o_tdata({body_o_tlast, body_o_tdata}), .o_tvalid(body_o_tvalid), .o_tready(body_o_tready) + ); + + wire odd_len = hdr_o_tuser[98] ^ |hdr_o_tuser[97:96]; + + generate + if (WIDTH == 32) begin : gen_32bit_output + // 32-bit Output + + always @(posedge samp_clk) begin + if(samp_rst) begin + second_half <= 1'b0; + end else begin + if(o_tvalid & o_tready) begin + if(o_tlast) + second_half <= 1'b0; + else + second_half <= ~second_half; + end + end + end + + assign o_tdata = second_half ? body_o_tdata[WIDTH-1:0] : body_o_tdata[(2*WIDTH)-1:WIDTH]; + assign o_tlast = body_o_tlast & (second_half | odd_len); + assign o_tuser = hdr_o_tuser; + assign o_tvalid = hdr_o_tvalid & body_o_tvalid; + + assign hdr_o_tready = o_tvalid & o_tready & o_tlast; + assign body_o_tready = o_tvalid & o_tready & (o_tlast | second_half); + + end else begin : gen_64bit_output + // 64-bit Output + + assign o_tdata = body_o_tdata; + assign o_tlast = body_o_tlast; + assign o_tuser = hdr_o_tuser; + assign o_tvalid = hdr_o_tvalid & body_o_tvalid; + + assign hdr_o_tready = o_tvalid & o_tready & o_tlast; + assign body_o_tready = o_tvalid & o_tready; + + end + endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/chdr_fifo_large.v b/fpga/usrp3/lib/rfnoc/chdr_fifo_large.v new file mode 100644 index 000000000..77e1f8a0b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/chdr_fifo_large.v @@ -0,0 +1,94 @@ +// +// Copyright 2016 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module chdr_fifo_large #( + parameter SIZE = 12, + parameter DEVICE = "7SERIES" +) ( + input clk, + input reset, + input clear, + + input [63:0] i_tdata, + input i_tlast, + input i_tvalid, + output i_tready, + + output [63:0] o_tdata, + output o_tlast, + output o_tvalid, + input o_tready +); + + localparam SIZE_THRESHOLD = ( + (DEVICE == "7SERIES") ? 14 : ( + (DEVICE == "VIRTEX6") ? 14 : ( + (DEVICE == "SPARTAN6") ? 12 : ( + 12 + )))); + + wire [63:0] i_tdata_pre; + wire i_tlast_pre, i_tvalid_pre, i_tready_pre; + + // SRL based FIFO to break timing paths to BRAM resources + axi_fifo_flop2 #(.WIDTH(65)) pre_fifo ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({i_tlast, i_tdata}), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata({i_tlast_pre, i_tdata_pre}), .o_tvalid(i_tvalid_pre), .o_tready(i_tready_pre), + .space(), .occupied() + ); + + generate + if (SIZE <= SIZE_THRESHOLD) begin + wire [63:0] o_tdata_int; + wire o_tlast_int, o_tvalid_int, o_tready_int; + // Instantiate a single axi_fifo if size is not larger than threshold + axi_fifo #(.WIDTH(65), .SIZE(SIZE)) main_fifo ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({i_tlast_pre, i_tdata_pre}), .i_tvalid(i_tvalid_pre), .i_tready(i_tready_pre), + .o_tdata({o_tlast_int, o_tdata_int}), .o_tvalid(o_tvalid_int), .o_tready(o_tready_int), + .space(), .occupied() + ); + axi_fifo_flop2 #(.WIDTH(65)) fifo_flop2 ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({o_tlast_int, o_tdata_int}), .i_tvalid(o_tvalid_int), .i_tready(o_tready_int), + .o_tdata({o_tlast, o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready), + .space(), .occupied() + ); + end else begin + // Instantiate a cascade of axi_fifos if size is larger than threshold + localparam CDEPTH = 2**(SIZE - SIZE_THRESHOLD); //Cascade Depth + wire [63:0] c_tdata[CDEPTH:0], int_tdata[CDEPTH-1:0]; + wire c_tlast[CDEPTH:0], c_tvalid[CDEPTH:0], c_tready[CDEPTH:0]; + wire int_tlast[CDEPTH-1:0], int_tvalid[CDEPTH-1:0], int_tready[CDEPTH-1:0]; + + //Connect input to first cascade state + assign {c_tdata[0], c_tlast[0], c_tvalid[0]} = {i_tdata_pre, i_tlast_pre, i_tvalid_pre}; + assign i_tready_pre = c_tready[0]; + //Connect output to last cascade state + assign {o_tdata, o_tlast, o_tvalid} = {c_tdata[CDEPTH], c_tlast[CDEPTH], c_tvalid[CDEPTH]}; + assign c_tready[CDEPTH] = o_tready; + + genvar i; + for (i=0; i<CDEPTH; i=i+1) begin: fifo_stages + axi_fifo #(.WIDTH(65), .SIZE(SIZE_THRESHOLD)) main_fifo ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({c_tlast[i], c_tdata[i]}), .i_tvalid(c_tvalid[i]), .i_tready(c_tready[i]), + .o_tdata({int_tlast[i], int_tdata[i]}), .o_tvalid(int_tvalid[i]), .o_tready(int_tready[i]), + .space(), .occupied() + ); + axi_fifo_flop2 #(.WIDTH(65)) fifo_flop2 ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({int_tlast[i], int_tdata[i]}), .i_tvalid(int_tvalid[i]), .i_tready(int_tready[i]), + .o_tdata({c_tlast[i+1], c_tdata[i+1]}), .o_tvalid(c_tvalid[i+1]), .o_tready(c_tready[i+1]), + .space(), .occupied() + ); + end + end + endgenerate + +endmodule // axi_fifo_large diff --git a/fpga/usrp3/lib/rfnoc/chdr_framer.v b/fpga/usrp3/lib/rfnoc/chdr_framer.v new file mode 100644 index 000000000..271c8e3d5 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/chdr_framer.v @@ -0,0 +1,128 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// FIXME handle odd length inputs +// +// Warning: Currently only 32 / 64-bit input widths are supported. +// If 64-bit is selected, there will be a bubble state. +// + +module chdr_framer + #(parameter SIZE=10, + parameter WIDTH=32, // 32 or 64 only! TODO: Extend to other widths. + parameter USE_SEQ_NUM=0) // Use provided seq number in tuser + (input clk, input reset, input clear, + input [WIDTH-1:0] i_tdata, input [127:0] i_tuser, input i_tlast, input i_tvalid, output i_tready, + output [63:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + wire header_i_tvalid, header_i_tready; + wire [63:0] body_i_tdata; + wire body_i_tlast, body_i_tvalid, body_i_tready; + + wire [127:0] header_o_tdata; + wire header_o_tvalid, header_o_tready; + wire [63:0] body_o_tdata; + wire body_o_tlast, body_o_tvalid, body_o_tready; + reg [15:0] length; + reg [11:0] seqnum; + + assign i_tready = header_i_tready & body_i_tready; + assign header_i_tvalid = i_tlast & i_tvalid & i_tready; + assign body_i_tlast = i_tlast; + + // Handle 32 and 64 widths + generate + if (WIDTH == 32) begin + reg even; + always @(posedge clk) + if(reset | clear) + even <= 0; + else + if(i_tvalid & i_tready) + if(i_tlast) + even <= 0; + else + even <= ~even; + + reg [31:0] held_i_tdata; + always @(posedge clk) begin + if (i_tvalid & i_tready) held_i_tdata <= i_tdata; + end + assign body_i_tvalid = i_tvalid & i_tready & (i_tlast | even); + assign body_i_tdata = even ? { held_i_tdata, i_tdata } : {i_tdata, i_tdata}; // really should be 0 in bottom, but this simplifies mux + end else begin + assign body_i_tvalid = i_tvalid; + assign body_i_tdata = i_tdata; + end + endgenerate + + // FIXME handle lengths of partial 32-bit words + always @(posedge clk) + if(reset | clear) + length <= (WIDTH == 32) ? 4 : 8; + else if(header_i_tready & header_i_tvalid) + length <= (WIDTH == 32) ? 4 : 8; + else if(i_tvalid & i_tready) + length <= (WIDTH == 32) ? length + 4 : length + 8; + + axi_fifo_flop2 #(.WIDTH(128)) header_fifo_flop2 + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata({i_tuser[127:112],length,i_tuser[95:0]}), .i_tvalid(header_i_tvalid), .i_tready(header_i_tready), + .o_tdata(header_o_tdata), .o_tvalid(header_o_tvalid), .o_tready(header_o_tready), + .occupied(), .space()); + + axi_fifo #(.WIDTH(65), .SIZE(SIZE)) body_fifo + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata({body_i_tlast,body_i_tdata}), .i_tvalid(body_i_tvalid), .i_tready(body_i_tready), + .o_tdata({body_o_tlast,body_o_tdata}), .o_tvalid(body_o_tvalid), .o_tready(body_o_tready), + .occupied(), .space()); + + reg [3:0] chdr_state; + localparam ST_IDLE = 0; + localparam ST_HEAD = 1; + localparam ST_TIME = 2; + localparam ST_BODY = 3; + + always @(posedge clk) + if(reset | clear) + chdr_state <= ST_IDLE; + else + case(chdr_state) + ST_IDLE : + if(header_o_tvalid & body_o_tvalid) + chdr_state <= ST_HEAD; + ST_HEAD : + if(o_tready) + if(header_o_tdata[125]) // time + chdr_state <= ST_TIME; + else + chdr_state <= ST_BODY; + ST_TIME : + if(o_tready) + chdr_state <= ST_BODY; + ST_BODY : + if(o_tready & body_o_tlast) + chdr_state <= ST_IDLE; + endcase // case (chdr_state) + + always @(posedge clk) + if(reset | clear) + seqnum <= 12'd0; + else + if(o_tvalid & o_tready & o_tlast) + seqnum <= seqnum + 12'd1; + + wire [15:0] out_length = header_o_tdata[111:96] + (header_o_tdata[125] ? 16'd16 : 16'd8); + + assign o_tvalid = (chdr_state == ST_HEAD) | (chdr_state == ST_TIME) | (body_o_tvalid & (chdr_state == ST_BODY)); + assign o_tlast = (chdr_state == ST_BODY) & body_o_tlast; + assign o_tdata = (chdr_state == ST_HEAD) ? {header_o_tdata[127:124], (USE_SEQ_NUM ? header_o_tdata[123:112] : seqnum), out_length, header_o_tdata[95:64] } : + (chdr_state == ST_TIME) ? header_o_tdata[63:0] : + body_o_tdata; + assign body_o_tready = (chdr_state == ST_BODY) & o_tready; + assign header_o_tready = ((chdr_state == ST_TIME) | ((chdr_state == ST_HEAD) & ~header_o_tdata[125])) & o_tready; + +endmodule // chdr_framer diff --git a/fpga/usrp3/lib/rfnoc/chdr_framer_2clk.v b/fpga/usrp3/lib/rfnoc/chdr_framer_2clk.v new file mode 100644 index 000000000..4c430fbff --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/chdr_framer_2clk.v @@ -0,0 +1,146 @@ +///////////////////////////////////////////////////////////////////// +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_framer_2clk +// Description: +// - Takes a sample stream in and uses the tuser input to frame +// a CHDR packet which is output by the module +// samples at the output +// - FIXME Currently only 32 / 64-bit input widths are supported. +// +///////////////////////////////////////////////////////////////////// + +module chdr_framer_2clk #( + parameter SIZE = 10, + parameter WIDTH = 32, // 32 or 64 only! TODO: Extend to other widths. + parameter USE_SEQ_NUM = 0 // Use provided seq number in tuser +) ( + input samp_clk, input samp_rst, input pkt_clk, input pkt_rst, + input [WIDTH-1:0] i_tdata, input [127:0] i_tuser, input i_tlast, input i_tvalid, output i_tready, + output [63:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + wire header_i_tvalid, header_i_tready; + wire [63:0] body_i_tdata; + wire body_i_tlast, body_i_tvalid, body_i_tready; + + wire [127:0] header_o_tdata; + wire header_o_tvalid, header_o_tready; + wire [63:0] body_o_tdata; + wire body_o_tlast, body_o_tvalid, body_o_tready; + reg [15:0] length; + reg [11:0] seqnum; + + assign i_tready = header_i_tready & body_i_tready; + assign header_i_tvalid = i_tlast & i_tvalid & i_tready; + assign body_i_tlast = i_tlast; + + // Handle 32 and 64 widths + generate + if (WIDTH == 32) begin + reg even = 1'b0; + always @(posedge samp_clk) + if(samp_rst) + even <= 1'b0; + else + if(i_tvalid & i_tready) + if(i_tlast) + even <= 1'b0; + else + even <= ~even; + + reg [31:0] held_i_tdata; + always @(posedge samp_clk) begin + if (i_tvalid & i_tready) held_i_tdata <= i_tdata; + end + assign body_i_tvalid = i_tvalid & i_tready & (i_tlast | even); + assign body_i_tdata = even ? { held_i_tdata, i_tdata } : {i_tdata, i_tdata}; // really should be 0 in bottom, but this simplifies mux + end else begin + assign body_i_tvalid = i_tvalid & i_tready; + assign body_i_tdata = i_tdata; + end + endgenerate + + // FIXME handle lengths of partial 32-bit words + always @(posedge samp_clk) + if (samp_rst) + length <= (WIDTH == 32) ? 16'd4 : 16'd8; + else if(header_i_tready & header_i_tvalid) + length <= (WIDTH == 32) ? 16'd4 : 16'd8; + else if(i_tvalid & i_tready) + length <= (WIDTH == 32) ? length + 16'd4 : length + 16'd8; + + // Extended reset signal to ensure longer reset on axi_fifo_2clk + // as recommended by Xilinx. It clears all partial packets seen + // after clearing the fifos. + // This pulse stretch ratio works in this case and may not work + // for all clocks. + wire samp_rst_stretch; + pulse_stretch #(.SCALE('d10)) samp_reset_i ( + .clk(samp_clk), + .rst(1'b0), + .pulse(samp_rst), + .pulse_stretched(samp_rst_stretch) + ); + + axi_fifo_2clk #(.WIDTH(128), .SIZE(5)) hdr_fifo_i ( + .i_aclk(samp_clk), .o_aclk(pkt_clk), .reset(samp_rst_stretch), + .i_tdata({i_tuser[127:112],length,i_tuser[95:0]}), .i_tvalid(header_i_tvalid), .i_tready(header_i_tready), + .o_tdata(header_o_tdata), .o_tvalid(header_o_tvalid), .o_tready(header_o_tready) + ); + + axi_fifo_2clk #(.WIDTH(65), .SIZE(SIZE)) body_fifo_i ( + .i_aclk(samp_clk), .o_aclk(pkt_clk), .reset(samp_rst_stretch), + .i_tdata({body_i_tlast,body_i_tdata}), .i_tvalid(body_i_tvalid), .i_tready(body_i_tready), + .o_tdata({body_o_tlast,body_o_tdata}), .o_tvalid(body_o_tvalid), .o_tready(body_o_tready) + ); + + reg [1:0] chdr_state; + localparam [1:0] ST_IDLE = 0; + localparam [1:0] ST_HEAD = 1; + localparam [1:0] ST_TIME = 2; + localparam [1:0] ST_BODY = 3; + + always @(posedge pkt_clk) + if(pkt_rst) + chdr_state <= ST_IDLE; + else + case(chdr_state) + ST_IDLE : + if(header_o_tvalid & body_o_tvalid) + chdr_state <= ST_HEAD; + ST_HEAD : + if(o_tready) + if(header_o_tdata[125]) // time + chdr_state <= ST_TIME; + else + chdr_state <= ST_BODY; + ST_TIME : + if(o_tready) + chdr_state <= ST_BODY; + ST_BODY : + if(o_tready & body_o_tlast) + chdr_state <= ST_IDLE; + endcase + + always @(posedge pkt_clk) + if(pkt_rst) + seqnum <= 12'd0; + else + if(o_tvalid & o_tready & o_tlast) + seqnum <= seqnum + 12'd1; + + wire [15:0] out_length = header_o_tdata[111:96] + (header_o_tdata[125] ? 16'd16 : 16'd8); + + assign o_tvalid = (chdr_state == ST_HEAD) | (chdr_state == ST_TIME) | (body_o_tvalid & (chdr_state == ST_BODY)); + assign o_tlast = (chdr_state == ST_BODY) & body_o_tlast; + assign o_tdata = (chdr_state == ST_HEAD) ? {header_o_tdata[127:124], (USE_SEQ_NUM == 1 ? header_o_tdata[123:112] : seqnum), out_length, header_o_tdata[95:64] } : + (chdr_state == ST_TIME) ? header_o_tdata[63:0] : + body_o_tdata; + assign body_o_tready = (chdr_state == ST_BODY) & o_tready; + assign header_o_tready = ((chdr_state == ST_TIME) | ((chdr_state == ST_HEAD) & ~header_o_tdata[125])) & o_tready; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/chdr_pkt_types.vh b/fpga/usrp3/lib/rfnoc/chdr_pkt_types.vh new file mode 100644 index 000000000..204390cfc --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/chdr_pkt_types.vh @@ -0,0 +1,11 @@ +// CHDR Packet types +// [2:1]: Type +// [0]: EOB +localparam [2:0] DATA_PKT = 3'b000; +localparam [2:0] DATA_EOB_PKT = 3'b001; +localparam [2:0] FC_RESP_PKT = 3'b010; +localparam [2:0] FC_ACK_PKT = 3'b011; +localparam [2:0] CMD_PKT = 3'b100; +localparam [2:0] CMD_EOB_PKT = 3'b101; // Unused +localparam [2:0] RESP_PKT = 3'b110; +localparam [2:0] RESP_ERR_PKT = 3'b111; diff --git a/fpga/usrp3/lib/rfnoc/cic_decimate.v b/fpga/usrp3/lib/rfnoc/cic_decimate.v new file mode 100644 index 000000000..6a723d88e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/cic_decimate.v @@ -0,0 +1,148 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module cic_decimate #( + parameter WIDTH = 16, + parameter N = 4, + parameter MAX_RATE = 256 +)( + input clk, + input reset, + input rate_stb, + input [$clog2(MAX_RATE+1)-1:0] rate, // +1 due to $clog2() rounding + input strobe_in, + output reg strobe_out, + input last_in, + output reg last_out, + input [WIDTH-1:0] signal_in, + output reg [WIDTH-1:0] signal_out +); + + wire [WIDTH+(N*$clog2(MAX_RATE+1))-1:0] signal_in_ext; + reg [WIDTH+(N*$clog2(MAX_RATE+1))-1:0] integrator [0:N-1]; + reg [WIDTH+(N*$clog2(MAX_RATE+1))-1:0] differentiator [0:N-1]; + reg [WIDTH+(N*$clog2(MAX_RATE+1))-1:0] pipeline [0:N-1]; + reg [WIDTH+(N*$clog2(MAX_RATE+1))-1:0] sampler; + reg [N-1:0] last_integ; + reg last_integ_hold; + reg [N-1:0] last_diff; + reg last_sampler; + reg [N-1:0] strobe_integ; + reg strobe_sampler; + reg [N-1:0] strobe_diff; + + integer i; + + sign_extend #(WIDTH,WIDTH+(N*$clog2(MAX_RATE+1))) ext_input (.in(signal_in),.out(signal_in_ext)); + + // Integrate + always @(posedge clk) begin + if (reset) begin + last_integ <= 0; + last_integ_hold <= 0; + for (i = 0; i < N; i = i + 1) begin + integrator[i] <= 0; + strobe_integ[i] <= 0; + end + end else begin + strobe_integ <= {strobe_integ[N-2:0],strobe_in}; + if (strobe_in) begin + last_integ[0] <= last_in; + integrator[0] <= integrator[0] + signal_in_ext; + end + for (i = 1; i < N; i = i + 1) begin + if (strobe_integ[i-1]) begin + last_integ[i] <= last_integ[i-1]; + integrator[i] <= integrator[i] + integrator[i-1]; + end + end + if (last_integ[N-1] & ~strobe_sampler) begin + last_integ_hold <= 1'b1; + end else if (strobe_sampler) begin + last_integ_hold <= 1'b0; + end + end + end + + // Sampler strobe + reg [$clog2(MAX_RATE+1)-1:0] counter; + always @(posedge clk) begin + if (reset) begin + counter <= rate; + strobe_sampler <= 1'b0; + last_sampler <= 1'b0; + sampler <= 'd0; + end else begin + strobe_sampler <= 1'b0; + last_sampler <= 1'b0; + if (rate_stb) begin + counter <= rate; + end else if (strobe_integ[N-1]) begin + if (counter <= 1) begin + counter <= rate; + strobe_sampler <= 1'b1; + last_sampler <= last_integ[N-1] | last_integ_hold; + sampler <= integrator[N-1]; + end else begin + counter <= counter - 1; + end + end + end + end + + // Differentiate + always @(posedge clk) begin + if (reset) begin + last_diff <= 0; + for (i = 0; i < N; i = i + 1) begin + pipeline[i] <= 0; + differentiator[i] <= 0; + strobe_diff <= 0; + end + end else begin + strobe_diff <= {strobe_diff[N-2:0], strobe_sampler}; + if (strobe_sampler) begin + last_diff[0] <= last_sampler; + differentiator[0] <= sampler; + pipeline[0] <= sampler - differentiator[0]; + end + for (i = 1; i < N; i = i + 1) begin + if (strobe_diff[i-1]) begin + last_diff[i] <= last_diff[i-1]; + differentiator[i] <= pipeline[i-1]; + pipeline[i] <= pipeline[i-1] - differentiator[i]; + end + end + end + end + + genvar l; + wire [WIDTH-1:0] signal_out_shifted[0:MAX_RATE]; + generate + for (l = 1; l <= MAX_RATE; l = l + 1) begin + // N*log2(rate), $clog2(rate) = ceil(log2(rate)) which rounds to nearest shift without overflow + assign signal_out_shifted[l] = pipeline[N-1][$clog2(l**N)+WIDTH-1:$clog2(l**N)]; + end + endgenerate + assign signal_out_shifted[0] = pipeline[N-1][WIDTH-1:0]; + + // Output register + always @(posedge clk) begin + if (reset) begin + last_out <= 1'b0; + strobe_out <= 1'b0; + signal_out <= 'd0; + end else begin + strobe_out <= strobe_diff[N-1]; + if (strobe_diff[N-1]) begin + last_out <= last_diff[N-1]; + signal_out <= signal_out_shifted[rate]; + end + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/cic_interpolate.v b/fpga/usrp3/lib/rfnoc/cic_interpolate.v new file mode 100644 index 000000000..d59e973fc --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/cic_interpolate.v @@ -0,0 +1,132 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module cic_interpolate #( + parameter WIDTH = 16, + parameter N = 4, + parameter MAX_RATE = 128 +)( + input clk, + input reset, + input rate_stb, + input [$clog2(MAX_RATE+1)-1:0] rate, // +1 due to $clog2() rounding + input strobe_in, + output reg strobe_out, + input [WIDTH-1:0] signal_in, + output reg [WIDTH-1:0] signal_out +); + + wire [WIDTH+$clog2(MAX_RATE**(N-1))-1:0] signal_in_ext; + reg [WIDTH+$clog2(MAX_RATE**(N-1))-1:0] integrator [0:N-1]; + reg [WIDTH+$clog2(MAX_RATE**(N-1))-1:0] differentiator [0:N-1]; + reg [WIDTH+$clog2(MAX_RATE**(N-1))-1:0] pipeline [0:N-1]; + reg [WIDTH+$clog2(MAX_RATE**(N-1))-1:0] sampler; + + reg [N-1:0] strobe_diff; + reg [N-1:0] strobe_integ; + reg strobe_sampler; + + integer i; + + sign_extend #(WIDTH,WIDTH+$clog2(MAX_RATE**(N-1))) ext_input (.in(signal_in),.out(signal_in_ext)); + + // Differentiate + always @(posedge clk) begin + if (reset) begin + strobe_diff <= 'd0; + for (i = 0; i < N; i = i + 1) begin + differentiator[i] <= 0; + pipeline[i] <= 0; + end + end else begin + strobe_diff <= {strobe_diff[N-2:0], strobe_in}; + if (strobe_in) begin + differentiator[0] <= signal_in_ext; + pipeline[0] <= signal_in_ext - differentiator[0]; + end + for (i = 1; i < N; i = i + 1) begin + if (strobe_diff[i-1]) begin + differentiator[i] <= pipeline[i-1]; + pipeline[i] <= pipeline[i-1] - differentiator[i]; + end + end + end + end + + // Strober + reg [$clog2(MAX_RATE+1)-1:0] counter; + wire strobe_out_int; + + always @(posedge clk) begin + if (reset | rate_stb) begin + counter <= rate; + end else if (strobe_diff[N-1]) begin + counter <= rate - 1; + end else begin + if (counter == 0) begin + counter <= rate; + end else if (counter < rate) begin + counter <= counter - 1; + end + end + end + + assign strobe_out_int = (counter < rate) & ~rate_stb; + + // Integrate + always @(posedge clk) begin + if (reset) begin + strobe_sampler <= 1'b0; + strobe_integ <= 'd0; + for (i = 0; i < N; i = i + 1) begin + integrator[i] <= 0; + end + end else begin + strobe_sampler <= strobe_diff[N-1]; + if (strobe_diff[N-1]) begin + sampler <= pipeline[N-1]; + end + strobe_integ <= {strobe_integ[N-2:0],strobe_out_int}; + if (strobe_sampler) begin + integrator[0] <= integrator[0] + sampler; + end + for (i = 1; i < N; i = i + 1) begin + if (strobe_integ[i-1]) begin + integrator[i] <= integrator[i] + integrator[i-1]; + end + end + end + end + + genvar l; + wire [WIDTH-1:0] signal_out_shifted[0:MAX_RATE]; + wire signal_out_shifted_strobe[0:MAX_RATE]; + generate + for (l = 0; l <= MAX_RATE; l = l + 1) begin + axi_round #( + .WIDTH_IN((l == 0 || l == 1) ? WIDTH : $clog2(l**(N-1))+WIDTH), + .WIDTH_OUT(WIDTH)) + axi_round ( + .clk(clk), .reset(reset), + .i_tdata((l == 0 || l == 1) ? integrator[N-1][WIDTH-1:0] : integrator[N-1][$clog2(l**(N-1))+WIDTH-1:0]), + .i_tlast(1'b0), .i_tvalid(strobe_integ[N-1]), .i_tready(), + .o_tdata(signal_out_shifted[l]), .o_tlast(), .o_tvalid(signal_out_shifted_strobe[l]), .o_tready(1'b1)); + end + endgenerate + + // Output register + always @(posedge clk) begin + if (reset) begin + strobe_out <= 1'b0; + signal_out <= 'd0; + end else begin + strobe_out <= signal_out_shifted_strobe[0]; // Any of the strobes will work here + signal_out <= signal_out_shifted[rate]; + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/cmul.v b/fpga/usrp3/lib/rfnoc/cmul.v new file mode 100644 index 000000000..1acb1a8cc --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/cmul.v @@ -0,0 +1,23 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This block exists only to wrap the Xilinx IP which has a different interface +// Xilinx puts Q in the high bits, I in the low bits, and inverts reset + +module cmul + (input clk, input reset, + input [31:0] a_tdata, input a_tlast, input a_tvalid, output a_tready, + input [31:0] b_tdata, input b_tlast, input b_tvalid, output b_tready, + output [63:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + complex_multiplier complex_multiplier + (.aclk(clk), .aresetn(~reset), + .s_axis_a_tdata({a_tdata[15:0], a_tdata[31:16]}), .s_axis_a_tlast(a_tlast), .s_axis_a_tvalid(a_tvalid), .s_axis_a_tready(a_tready), + .s_axis_b_tdata({b_tdata[15:0], b_tdata[31:16]}), .s_axis_b_tlast(b_tlast), .s_axis_b_tvalid(b_tvalid), .s_axis_b_tready(b_tready), + .s_axis_ctrl_tdata(8'd0), .s_axis_ctrl_tvalid(1'b1), .s_axis_ctrl_tready(), + .m_axis_dout_tdata({o_tdata[31:0], o_tdata[63:32]}), .m_axis_dout_tlast(o_tlast), .m_axis_dout_tvalid(o_tvalid), .m_axis_dout_tready(o_tready)); + +endmodule // cmul diff --git a/fpga/usrp3/lib/rfnoc/complex_invert.v b/fpga/usrp3/lib/rfnoc/complex_invert.v new file mode 100644 index 000000000..800b5dc97 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/complex_invert.v @@ -0,0 +1,157 @@ +// +// Copyright 2015 Ettus Research LLC +// +// General complex invert algorithm: +// 1 1 a - bi a - bi a bi +// ------ = ------ * ------ = ----------- = --------- - --------- +// a + bi a + bi a - bi a^2 + b^2 a^2 + b^2 a^2 + b^2 +// + +module complex_invert +( + input clk, input reset, input clear, + input [31:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [31:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + wire [15:0] a_tdata; + wire [31:0] a_tdata_int; + wire a_tlast; + wire a_tvalid; + wire a_tready; + wire [15:0] b_tdata; + wire [31:0] b_tdata_int; + wire b_tlast; + wire b_tvalid; + wire b_tready; + wire [31:0] a_b_tdata; + wire a_b_tlast; + wire a_b_tvalid; + wire a_b_tready; + + // Replicate input data into three streams with FIFOing to account for varying latency on the paths + split_stream_fifo #( + .WIDTH(32), + .ACTIVE_MASK(4'b0111), + .FIFO_SIZE(5)) + input_split_stream_fifo0 ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata(i_tdata), .i_tlast(i_tlast), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o0_tdata(a_tdata_int), .o0_tlast(a_tlast), .o0_tvalid(a_tvalid), .o0_tready(a_tready), + .o1_tdata(b_tdata_int), .o1_tlast(b_tlast), .o1_tvalid(b_tvalid), .o1_tready(b_tready), + .o2_tdata(a_b_tdata), .o2_tlast(a_b_tlast), .o2_tvalid(a_b_tvalid), .o2_tready(a_b_tready), + .o3_tdata(), .o3_tlast(), .o3_tvalid(), .o3_tready(1'b0)); + + assign a_tdata = a_tdata_int[31:16]; + assign b_tdata = b_tdata_int[15:0]; + + wire [31:0] a2_plus_b2_tdata; + wire a2_plus_b2_tlast; + wire a2_plus_b2_tvalid; + wire a2_plus_b2_tready; + + // a^2 + b^2 + complex_to_magsq + a2_p_b2_complex_to_magsq ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata(a_b_tdata), .i_tlast(a_b_tlast), .i_tvalid(a_b_tvalid), .i_tready(a_b_tready), + .o_tdata(a2_plus_b2_tdata), .o_tlast(a2_plus_b2_tlast), .o_tvalid(a2_plus_b2_tvalid), .o_tready(a2_plus_b2_tready)); + + wire [31:0] a2_plus_b2_0_tdata; + wire a2_plus_b2_0_tlast; + wire a2_plus_b2_0_tvalid; + wire a2_plus_b2_0_tready; + wire [31:0] a2_plus_b2_1_tdata; + wire a2_plus_b2_1_tlast; + wire a2_plus_b2_1_tvalid; + wire a2_plus_b2_1_tready; + + // Replicate two a^2 + b^2 streams for dividers + split_stream_fifo #( + .WIDTH(32), + .ACTIVE_MASK(4'b0011), + .FIFO_SIZE(5)) + input_split_stream_fifo1 ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata(a2_plus_b2_tdata), .i_tlast(a2_plus_b2_tlast), .i_tvalid(a2_plus_b2_tvalid), .i_tready(a2_plus_b2_tready), + .o0_tdata(a2_plus_b2_0_tdata), .o0_tlast(a2_plus_b2_0_tlast), .o0_tvalid(a2_plus_b2_0_tvalid), .o0_tready(a2_plus_b2_0_tready), + .o1_tdata(a2_plus_b2_1_tdata), .o1_tlast(a2_plus_b2_1_tlast), .o1_tvalid(a2_plus_b2_1_tvalid), .o1_tready(a2_plus_b2_1_tready), + .o2_tdata(), .o2_tlast(), .o2_tvalid(), .o2_tready(1'b0), + .o3_tdata(), .o3_tlast(), .o3_tvalid(), .o3_tready(1'b0)); + + wire div_by_zero_a; + wire [47:0] a_div_a2_plus_b2_tdata_int; // signed bit, 15 integer bits, fraction sign bit, 31 fraction + wire [47:0] a_div_a2_plus_b2_tdata = div_by_zero_a ? 48'd0 : a_div_a2_plus_b2_tdata_int; + wire a_div_a2_plus_b2_tlast; + wire a_div_a2_plus_b2_tvalid; + wire a_div_a2_plus_b2_tready; + + // a + // --------- + // a^2 + b^2 + // Warning: Divider does not sign extend fractional part into the integer part, although we throw away the integer + // part so this issue does not affect our design. + divide_int16_int32 + a_div_a2_plus_b2_divider ( + .aclk(clk), .aresetn(~reset), + .s_axis_divisor_tdata(a2_plus_b2_0_tdata), .s_axis_divisor_tlast(a2_plus_b2_0_tlast), .s_axis_divisor_tvalid(a2_plus_b2_0_tvalid), .s_axis_divisor_tready(a2_plus_b2_0_tready), + .s_axis_dividend_tdata(a_tdata), .s_axis_dividend_tlast(a_tlast), .s_axis_dividend_tvalid(a_tvalid), .s_axis_dividend_tready(a_tready), + .m_axis_dout_tdata(a_div_a2_plus_b2_tdata_int), .m_axis_dout_tlast(a_div_a2_plus_b2_tlast), .m_axis_dout_tvalid(a_div_a2_plus_b2_tvalid), .m_axis_dout_tready(a_div_a2_plus_b2_tready), + .m_axis_dout_tuser(div_by_zero_a)); + + wire [15:0] neg_b_tdata; + wire neg_b_tlast; + wire neg_b_tvalid; + wire neg_b_tready; + wire [15:0] neg_b = (b_tdata == -16'sd32768) ? 16'sd32767 : (~b_tdata + 1'b1); + + // Negate b + axi_fifo_flop #(.WIDTH(17)) + neg_b_axi_fifo_flop ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({b_tlast,neg_b}), .i_tvalid(b_tvalid), .i_tready(b_tready), + .o_tdata({neg_b_tlast,neg_b_tdata}), .o_tvalid(neg_b_tvalid), .o_tready(neg_b_tready), + .space(), .occupied()); + + wire div_by_zero_b; + wire [47:0] neg_b_div_a2_plus_b2_tdata_int; + wire [47:0] neg_b_div_a2_plus_b2_tdata = div_by_zero_b ? 48'd0 : neg_b_div_a2_plus_b2_tdata_int; + wire neg_b_div_a2_plus_b2_tlast; + wire neg_b_div_a2_plus_b2_tvalid; + wire neg_b_div_a2_plus_b2_tready; + + // bi + // --------- + // a^2 + b^2 + divide_int16_int32 + neg_b_div_a2_plus_b2_divider ( + .aclk(clk), .aresetn(~reset), + .s_axis_divisor_tdata(a2_plus_b2_1_tdata), .s_axis_divisor_tlast(a2_plus_b2_1_tlast), .s_axis_divisor_tvalid(a2_plus_b2_1_tvalid), .s_axis_divisor_tready(a2_plus_b2_1_tready), + .s_axis_dividend_tdata(neg_b_tdata), .s_axis_dividend_tlast(neg_b_tlast), .s_axis_dividend_tvalid(neg_b_tvalid), .s_axis_dividend_tready(neg_b_tready), + .m_axis_dout_tdata(neg_b_div_a2_plus_b2_tdata_int), .m_axis_dout_tlast(neg_b_div_a2_plus_b2_tlast), .m_axis_dout_tvalid(neg_b_div_a2_plus_b2_tvalid), .m_axis_dout_tready(neg_b_div_a2_plus_b2_tready), + .m_axis_dout_tuser(div_by_zero_b)); + + // Throw away integer part as the result will always be a fraction due to a^2 + b^2 > a (or b) + wire [63:0] one_div_a_plus_bi_tdata = {a_div_a2_plus_b2_tdata[31:0],neg_b_div_a2_plus_b2_tdata[31:0]}; + wire one_div_a_plus_bi_tlast; + wire one_div_a_plus_bi_tvalid; + wire one_div_a_plus_bi_tready; + + // Join into one word + axi_join #( + .INPUTS(2)) + inst_axi_join ( + .i_tlast({a_div_a2_plus_b2_tlast,neg_b_div_a2_plus_b2_tlast}), .i_tvalid({a_div_a2_plus_b2_tvalid,neg_b_div_a2_plus_b2_tvalid}), .i_tready({a_div_a2_plus_b2_tready,neg_b_div_a2_plus_b2_tready}), + .o_tlast(one_div_a_plus_bi_tlast), .o_tvalid(one_div_a_plus_bi_tvalid), .o_tready(one_div_a_plus_bi_tready)); + + // Truncate to a complex int16 + axi_round_and_clip_complex #( + .WIDTH_IN(32), + .WIDTH_OUT(16), + .CLIP_BITS(11), // Calibrated value + .FIFOSIZE()) + inst_axi_round_and_clip_complex ( + .clk(clk), .reset(reset), + .i_tdata(one_div_a_plus_bi_tdata), .i_tlast(one_div_a_plus_bi_tlast), .i_tvalid(one_div_a_plus_bi_tvalid), .i_tready(one_div_a_plus_bi_tready), + .o_tdata(o_tdata), .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready)); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/complex_to_mag_approx.v b/fpga/usrp3/lib/rfnoc/complex_to_mag_approx.v new file mode 100644 index 000000000..7455afcad --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/complex_to_mag_approx.v @@ -0,0 +1,136 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Fast magnitude approximation. +// +// ALPHA_DENOM & BETA_DENOM should be a power of 2 +// Multiplierless if ALPHA_NUM & BETA_NUM are 1 +// +// Mag ~= Alpha * max(|I|, |Q|) + Beta * min(|I|, |Q|) +// +// (table taken from http://www.dspguru.com/dsp/tricks/magnitude-estimator) +// ========================================= +// Alpha Beta Avg Err RMS Peak +// (linear) (dB) (dB) +// ----------------------------------------- +// 1, 1/2 -0.086775 -20.7 -18.6 +// 1, 1/4 0.006456 -27.6 -18.7 +// 1, 11/32 -0.028505 -28.0 -24.8 +// 1, 3/8 -0.040159 -26.4 -23.4 +// 15/16, 15/32 -0.018851 -29.2 -24.1 +// 15/16, 1/2 -0.030505 -26.9 -24.1 +// 31/32, 11/32 -0.000371 -31.6 -22.9 +// 31/32, 3/8 -0.012024 -31.4 -26.1 +// 61/64, 3/8 0.002043 -32.5 -24.3 +// 61/64, 13/32 0.009611 -31.8 -26.6 +// ========================================= +// +// Input: Complex, Output: Unsigned Int + +`ifndef LOG2 +`define LOG2(N) ( \ + N < 2 ? 0 : \ + N < 4 ? 1 : \ + N < 8 ? 2 : \ + N < 16 ? 3 : \ + N < 32 ? 4 : \ + N < 64 ? 5 : \ + N < 128 ? 6 : \ + N < 256 ? 7 : \ + N < 512 ? 8 : \ + N < 1024 ? 9 : \ + 10) +`endif + +module complex_to_mag_approx #( + parameter ALPHA_NUM = 1, + parameter ALPHA_DENOM = 1, + parameter BETA_NUM = 1, + parameter BETA_DENOM = 4, + parameter LATENCY = 3, // 0, 1, 2, or 3 + parameter SAMP_WIDTH = 16) +( + input clk, input reset, input clear, + input [2*SAMP_WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [SAMP_WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + wire [2*SAMP_WIDTH-1:0] pipeline_i_tdata[0:2], pipeline_o_tdata[0:2]; + wire [2:0] pipeline_i_tvalid, pipeline_i_tlast, pipeline_i_tready; + wire [2:0] pipeline_o_tvalid, pipeline_o_tlast, pipeline_o_tready; + wire signed [SAMP_WIDTH-1:0] i, q, max, max_int, min, min_int; + wire [SAMP_WIDTH-1:0] i_abs, q_abs, i_abs_int, q_abs_int, mag; + + + // Absolute value + assign i = i_tdata[2*SAMP_WIDTH-1:SAMP_WIDTH]; + assign q = i_tdata[SAMP_WIDTH-1:0]; + assign i_abs_int = i[SAMP_WIDTH-1] ? (~i + 1'b1) : i; + assign q_abs_int = q[SAMP_WIDTH-1] ? (~q + 1'b1) : q; + + + // First stage pipeline + assign pipeline_i_tdata[0] = {i_abs_int,q_abs_int}; + assign pipeline_i_tlast[0] = i_tlast; + assign pipeline_i_tvalid[0] = i_tvalid; + assign pipeline_o_tready[0] = pipeline_i_tready[1]; + + axi_fifo_flop #(.WIDTH(SAMP_WIDTH*2+1)) + pipeline0_axi_fifo_flop ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({pipeline_i_tlast[0],pipeline_i_tdata[0]}), .i_tvalid(pipeline_i_tvalid[0]), .i_tready(pipeline_i_tready[0]), + .o_tdata({pipeline_o_tlast[0],pipeline_o_tdata[0]}), .o_tvalid(pipeline_o_tvalid[0]), .o_tready(pipeline_o_tready[0])); + + + // Max & Min + assign i_abs = (LATENCY == 3) ? pipeline_o_tdata[0][2*SAMP_WIDTH-1:SAMP_WIDTH] : i_abs; + assign q_abs = (LATENCY == 3) ? pipeline_o_tdata[0][SAMP_WIDTH-1:0] : q_abs; + assign max_int = (i_abs > q_abs) ? i_abs : q_abs; + assign min_int = (i_abs > q_abs) ? q_abs : i_abs; + + + // Second stage pipeline + assign pipeline_i_tdata[1] = {max_int,min_int}; + assign pipeline_i_tlast[1] = (LATENCY == 2) ? i_tlast : pipeline_o_tlast[0]; + assign pipeline_i_tvalid[1] = (LATENCY == 2) ? i_tvalid : pipeline_o_tvalid[0]; + assign pipeline_o_tready[1] = pipeline_i_tready[2]; + + axi_fifo_flop #(.WIDTH(SAMP_WIDTH*2+1)) + pipeline1_axi_fifo_flop ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({pipeline_i_tlast[1],pipeline_i_tdata[1]}), .i_tvalid(pipeline_i_tvalid[1]), .i_tready(pipeline_i_tready[1]), + .o_tdata({pipeline_o_tlast[1],pipeline_o_tdata[1]}), .o_tvalid(pipeline_o_tvalid[1]), .o_tready(pipeline_o_tready[1])); + + + // Magnitude Approx + assign max = (LATENCY >= 2) ? pipeline_o_tdata[1][2*SAMP_WIDTH-1:SAMP_WIDTH] : max_int; + assign min = (LATENCY >= 2) ? pipeline_o_tdata[1][SAMP_WIDTH-1:0] : min_int; + assign mag = ALPHA_NUM * {{`LOG2(ALPHA_DENOM){1'b0}},max[SAMP_WIDTH-1:`LOG2(ALPHA_DENOM)]} + + BETA_NUM * {{`LOG2( BETA_DENOM){1'b0}},min[SAMP_WIDTH-1:`LOG2( BETA_DENOM)]}; + + + // Third stage pipeline + assign pipeline_i_tdata[2][SAMP_WIDTH-1:0] = mag; + assign pipeline_i_tlast[2] = (LATENCY == 1) ? i_tlast : pipeline_o_tlast[1]; + assign pipeline_i_tvalid[2] = (LATENCY == 1) ? i_tvalid : pipeline_o_tvalid[1]; + assign pipeline_o_tready[2] = o_tready; + + axi_fifo_flop #(.WIDTH(SAMP_WIDTH+1)) + pipeline2_axi_fifo_flop ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({pipeline_i_tlast[2],pipeline_i_tdata[2][SAMP_WIDTH-1:0]}), .i_tvalid(pipeline_i_tvalid[2]), .i_tready(pipeline_i_tready[2]), + .o_tdata({pipeline_o_tlast[2],pipeline_o_tdata[2][SAMP_WIDTH-1:0]}), .o_tvalid(pipeline_o_tvalid[2]), .o_tready(pipeline_o_tready[2])); + + + // Output based on LATENCY mux + assign o_tdata = (LATENCY == 0) ? mag : pipeline_o_tdata[2][SAMP_WIDTH-1:0]; + assign o_tlast = (LATENCY == 0) ? i_tlast : pipeline_o_tlast[2]; + assign o_tvalid = (LATENCY == 0) ? i_tvalid : pipeline_o_tvalid[2]; + assign i_tready = (LATENCY == 0) ? o_tready : + (LATENCY == 1) ? pipeline_i_tready[2] : + (LATENCY == 2) ? pipeline_i_tready[1] : pipeline_i_tready[0]; + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/complex_to_magsq.v b/fpga/usrp3/lib/rfnoc/complex_to_magsq.v new file mode 100644 index 000000000..8f0d5ea47 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/complex_to_magsq.v @@ -0,0 +1,57 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module complex_to_magsq #( + parameter WIDTH = 16) +( + input clk, input reset, input clear, + input [2*WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [2*WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + wire [WIDTH-1:0] ii_tdata, iq_tdata; + wire ii_tlast, ii_tvalid, ii_tready, iq_tlast, iq_tvalid, iq_tready; + + wire [2*WIDTH-1:0] i_sq_tdata; + wire i_sq_tlast, i_sq_tvalid, i_sq_tready; + + split_complex #(.WIDTH(WIDTH)) split_complex + (.i_tdata(i_tdata), .i_tlast(i_tlast), .i_tvalid(i_tvalid), .i_tready(i_tready), + .oi_tdata(ii_tdata), .oi_tlast(ii_tlast), .oi_tvalid(ii_tvalid), .oi_tready(ii_tready), + .oq_tdata(iq_tdata), .oq_tlast(iq_tlast), .oq_tvalid(iq_tvalid), .oq_tready(iq_tready), + .error()); + + // i^2 + mult #( + .WIDTH_A(WIDTH), + .WIDTH_B(WIDTH), + .WIDTH_P(2*WIDTH), + .DROP_TOP_P(5), + .LATENCY(2), // NOTE: If using CASCADE_OUT, set to 3 + .CASCADE_OUT(0)) // FIXME can use cascade once we get ISE to accept it + i_sq_mult ( + .clk(clk), .reset(reset), + .a_tdata(ii_tdata), .a_tlast(ii_tlast), .a_tvalid(ii_tvalid), .a_tready(ii_tready), + .b_tdata(ii_tdata), .b_tlast(ii_tlast), .b_tvalid(ii_tvalid), .b_tready(), + .p_tdata(i_sq_tdata), .p_tlast(i_sq_tlast), .p_tvalid(i_sq_tvalid), .p_tready(i_sq_tready)); + + // q^2 + i^2 + mult_add #( + .WIDTH_A(WIDTH), + .WIDTH_B(WIDTH), + .WIDTH_P(2*WIDTH), + .DROP_TOP_P(5), + .LATENCY(4), + .CASCADE_IN(0), // FIXME this can be 1 once we get ISE to accept cascading + .CASCADE_OUT(0)) + q_sq_mult ( + .clk(clk), .reset(reset), + .a_tdata(iq_tdata), .a_tlast(iq_tlast), .a_tvalid(iq_tvalid), .a_tready(iq_tready), + .b_tdata(iq_tdata), .b_tlast(iq_tlast), .b_tvalid(iq_tvalid), .b_tready(), + .c_tdata(i_sq_tdata), .c_tlast(i_sq_tlast), .c_tvalid(i_sq_tvalid), .c_tready(i_sq_tready), + .p_tdata(o_tdata), .p_tlast(o_tlast), .p_tvalid(o_tvalid), .p_tready(o_tready)); + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/conj.v b/fpga/usrp3/lib/rfnoc/conj.v new file mode 100644 index 000000000..85d78f1d1 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/conj.v @@ -0,0 +1,20 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// NOTE -- does not flop the output. could cause timing issues, so follow with axi_fifo_flop if you need it + +module conj + #(parameter WIDTH=16) + (input clk, input reset, input clear, + input [2*WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [2*WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + assign o_tdata = { i_tdata[2*WIDTH-1:WIDTH] , -i_tdata[WIDTH-1:0] }; + assign o_tlast = i_tlast; + assign o_tvalid = i_tvalid; + assign i_tready = o_tready; + +endmodule // conj diff --git a/fpga/usrp3/lib/rfnoc/const.v b/fpga/usrp3/lib/rfnoc/const.v new file mode 100644 index 000000000..0345ad406 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/const.v @@ -0,0 +1,30 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module const + #(parameter WIDTH=32) + (input clk, input reset, + input [WIDTH-1:0] config_tdata, input config_tlast, input config_tvalid, output config_tready, + output [WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + reg [WIDTH-1:0] const_val; + + always @(posedge clk) + if(reset) + const_val <= 0; + else + if(config_tvalid & config_tready) + const_val <= config_tdata; + + assign config_tready = 1'b1; + // FIXME do we want to sync constant change to tlasts? + + assign o_tdata = const_val; + assign o_tlast = 1'b0; // FIXME do we want something else here? + assign o_tvalid = 1'b1; // caution -- will fill up a fifo + +endmodule // const diff --git a/fpga/usrp3/lib/rfnoc/const_sreg.v b/fpga/usrp3/lib/rfnoc/const_sreg.v new file mode 100644 index 000000000..1bb9d711c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/const_sreg.v @@ -0,0 +1,24 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module const_sreg + #(parameter BASE=0, + parameter WIDTH=32) + (input clk, input reset, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + output [WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + wire [WIDTH-1:0] const_val; + + setting_reg #(.my_addr(BASE), .width(WIDTH)) reg_max + (.clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), .out(const_val)); + + assign o_tdata = const_val; + assign o_tlast = 1'b0; // FIXME do we want something else here? + assign o_tvalid = 1'b1; // caution -- will fill up a fifo + +endmodule // const_sreg diff --git a/fpga/usrp3/lib/rfnoc/core/Makefile.srcs b/fpga/usrp3/lib/rfnoc/core/Makefile.srcs new file mode 100644 index 000000000..0a646f98b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/Makefile.srcs @@ -0,0 +1,39 @@ +# +# Copyright 2018 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +################################################## +# RFNoC Core Sources +################################################## +RFNOC_CORE_HEADERS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/core/, \ +rfnoc_chdr_utils.vh \ +rfnoc_axis_ctrl_utils.vh \ +rfnoc_chdr_internal_utils.vh \ +ctrlport.vh \ +)) + +RFNOC_CORE_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/core/, \ +axis_ctrl_endpoint.v \ +axis_ctrl_master.v \ +axis_ctrl_slave.v \ +chdr_compute_tkeep.v \ +chdr_to_chdr_data.v \ +chdr_to_axis_pyld_ctxt.v \ +chdr_to_axis_data_mdata.v \ +chdr_to_axis_data.v \ +axis_pyld_ctxt_to_chdr.v \ +axis_data_mdata_to_chdr.v \ +axis_data_to_chdr.v \ +chdr_ingress_fifo.v \ +chdr_mgmt_pkt_handler.v \ +chdr_data_swapper.v \ +chdr_stream_endpoint.v \ +chdr_stream_input.v \ +chdr_stream_output.v \ +chdr_to_axis_ctrl.v \ +ctrlport_endpoint.v \ +backend_iface.v \ +rfnoc_core_kernel.v \ +)) diff --git a/fpga/usrp3/lib/rfnoc/core/axis_ctrl_endpoint.v b/fpga/usrp3/lib/rfnoc/core/axis_ctrl_endpoint.v new file mode 100644 index 000000000..e1ded42aa --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/axis_ctrl_endpoint.v @@ -0,0 +1,116 @@ +// +// Copyright 2018-2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ctrl_endpoint +// Description: +// A bidirectional AXIS-Control to AXIS-Control converter. +// Use this module in noc_shell to interface between the user +// logic and the rfnoc infrastructure when both interfaces use +// AXIS-Control. +// +// Parameters: +// - SYNC_CLKS: Is rfnoc_ctrl_clk and axis_ctrl_clk the same clock? +// - SLAVE_FIFO_SIZE: The depth of the slave FIFO. Note that the +// slave FIFO will also buffer master responses. +// +// Signals: +// - *_rfnoc_ctrl_* : Input/output AXIS-Control from/to the framework +// - *_axis_ctrl_* : Input/output AXIS-Control from/to the user + +module axis_ctrl_endpoint #( + parameter SYNC_CLKS = 0, + parameter SLAVE_FIFO_SIZE = 5 +)( + // Clocks, Resets, Misc + input wire rfnoc_ctrl_clk, + input wire rfnoc_ctrl_rst, + input wire axis_ctrl_clk, + input wire axis_ctrl_rst, + // AXIS-Control Bus (RFNoC infrastructure) + input wire [31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + output wire [31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready, + // AXIS-Control Bus (User logic) + input wire [31:0] s_axis_ctrl_tdata, + input wire s_axis_ctrl_tlast, + input wire s_axis_ctrl_tvalid, + output wire s_axis_ctrl_tready, + output wire [31:0] m_axis_ctrl_tdata, + output wire m_axis_ctrl_tlast, + output wire m_axis_ctrl_tvalid, + input wire m_axis_ctrl_tready +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_axis_ctrl_utils.vh" + + // --------------------------------------------------- + // Clock Crossing + // --------------------------------------------------- + + wire [31:0] i_ctrl_tdata; + wire i_ctrl_tlast, i_ctrl_tvalid, i_ctrl_tready; + + generate + if (SYNC_CLKS) begin + axi_fifo #(.WIDTH(32+1), .SIZE(SLAVE_FIFO_SIZE)) in_fifo_i ( + .clk(axis_ctrl_clk), .reset(axis_ctrl_rst), .clear(1'b0), + .i_tdata({s_rfnoc_ctrl_tlast, s_rfnoc_ctrl_tdata}), + .i_tvalid(s_rfnoc_ctrl_tvalid), .i_tready(s_rfnoc_ctrl_tready), + .o_tdata({i_ctrl_tlast, i_ctrl_tdata}), + .o_tvalid(i_ctrl_tvalid), .o_tready(i_ctrl_tready), + .space(), .occupied() + ); + + axi_fifo #(.WIDTH(32+1), .SIZE(1)) out_fifo_i ( + .clk(axis_ctrl_clk), .reset(axis_ctrl_rst), .clear(1'b0), + .i_tdata({s_axis_ctrl_tlast, s_axis_ctrl_tdata}), + .i_tvalid(s_axis_ctrl_tvalid), .i_tready(s_axis_ctrl_tready), + .o_tdata({m_rfnoc_ctrl_tlast, m_rfnoc_ctrl_tdata}), + .o_tvalid(m_rfnoc_ctrl_tvalid), .o_tready(m_rfnoc_ctrl_tready), + .space(), .occupied() + ); + end else begin + axi_fifo_2clk #(.WIDTH(32+1), .SIZE(SLAVE_FIFO_SIZE), .PIPELINE("NONE")) in_fifo_i ( + .reset(rfnoc_ctrl_rst), + .i_aclk(rfnoc_ctrl_clk), + .i_tdata({s_rfnoc_ctrl_tlast, s_rfnoc_ctrl_tdata}), + .i_tvalid(s_rfnoc_ctrl_tvalid), .i_tready(s_rfnoc_ctrl_tready), + .o_aclk(axis_ctrl_clk), + .o_tdata({i_ctrl_tlast, i_ctrl_tdata}), + .o_tvalid(i_ctrl_tvalid), .o_tready(i_ctrl_tready) + ); + + axi_fifo_2clk #(.WIDTH(32+1), .SIZE(1), .PIPELINE("NONE")) out_fifo_i ( + .reset(axis_ctrl_rst), + .i_aclk(axis_ctrl_clk), + .i_tdata({s_axis_ctrl_tlast, s_axis_ctrl_tdata}), + .i_tvalid(s_axis_ctrl_tvalid), .i_tready(s_axis_ctrl_tready), + .o_aclk(rfnoc_ctrl_clk), + .o_tdata({m_rfnoc_ctrl_tlast, m_rfnoc_ctrl_tdata}), + .o_tvalid(m_rfnoc_ctrl_tvalid), .o_tready(m_rfnoc_ctrl_tready) + ); + end + endgenerate + + axi_fifo #(.WIDTH(32+1), .SIZE(1)) slv_pipe_i ( + .clk(axis_ctrl_clk), .reset(axis_ctrl_rst), .clear(1'b0), + .i_tdata({i_ctrl_tlast, i_ctrl_tdata}), + .i_tvalid(i_ctrl_tvalid), .i_tready(i_ctrl_tready), + .o_tdata({m_axis_ctrl_tlast, m_axis_ctrl_tdata}), + .o_tvalid(m_axis_ctrl_tvalid), .o_tready(m_axis_ctrl_tready), + .space(), .occupied() + ); + +endmodule // axis_ctrl_endpoint + diff --git a/fpga/usrp3/lib/rfnoc/core/axis_ctrl_master.v b/fpga/usrp3/lib/rfnoc/core/axis_ctrl_master.v new file mode 100644 index 000000000..19ae98f52 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/axis_ctrl_master.v @@ -0,0 +1,316 @@ +// +// Copyright 2018-2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ctrl_master +// Description: +// This module implements an AXIS-Control master (and a Control-Port +// slave). Requests are accepted on the slave Control-Port, converted +// to AXIS-Control requests, then sent over the master AXI-Stream port. +// Responses are received on the AXI-Stream slave port, and converted +// to Control-Port responses. +// NOTE: Transactions are not buffered so there is no need for flow +// control or throttling. +// +// Parameters: +// - THIS_PORTID : The local port-ID of this control port +// +// Signals: +// - s_axis_ctrl_* : Input control stream (AXI-Stream) for responses +// - m_axis_ctrl_* : Output control stream (AXI-Stream) for requests +// - ctrlport_req_* : Control-port master request port +// - ctrlport_resp_* : Control-port master response port + +module axis_ctrl_master #( + parameter [9:0] THIS_PORTID = 10'd0 +)( + // Clock and reset + input wire clk, + input wire rst, + // AXIS-Control Bus (Response) + input wire [31:0] s_axis_ctrl_tdata, + input wire s_axis_ctrl_tlast, + input wire s_axis_ctrl_tvalid, + output wire s_axis_ctrl_tready, + // AXIS-Control Bus (Request) + output reg [31:0] m_axis_ctrl_tdata, + output wire m_axis_ctrl_tlast, + output wire m_axis_ctrl_tvalid, + input wire m_axis_ctrl_tready, + // Control Port Endpoint (Request) + input wire ctrlport_req_wr, + input wire ctrlport_req_rd, + input wire [19:0] ctrlport_req_addr, + input wire [9:0] ctrlport_req_portid, + input wire [15:0] ctrlport_req_rem_epid, + input wire [9:0] ctrlport_req_rem_portid, + input wire [31:0] ctrlport_req_data, + input wire [3:0] ctrlport_req_byte_en, + input wire ctrlport_req_has_time, + input wire [63:0] ctrlport_req_time, + // Control Port Endpoint (Response) + output wire ctrlport_resp_ack, + output wire [1:0] ctrlport_resp_status, + output wire [31:0] ctrlport_resp_data +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_axis_ctrl_utils.vh" + + // --------------------------------------------------- + // State Machine + // --------------------------------------------------- + localparam [3:0] ST_IDLE = 4'd0; // Waiting for a request on slave ctrlport + localparam [3:0] ST_REQ_HDR_LO = 4'd1; // Sending AXIS-Control request header (low bits) + localparam [3:0] ST_REQ_HDR_HI = 4'd2; // Sending AXIS-Control request header (high bits) + localparam [3:0] ST_REQ_TS_LO = 4'd3; // Sending AXIS-Control request timestamp (low bits) + localparam [3:0] ST_REQ_TS_HI = 4'd4; // Sending AXIS-Control request timestamp (high bits) + localparam [3:0] ST_REQ_OP_WORD = 4'd5; // Sending AXIS-Control request operation word + localparam [3:0] ST_REQ_OP_DATA = 4'd6; // Sending AXIS-Control request data word + localparam [3:0] ST_RESP_HDR_LO = 4'd7; // Receiving AXIS-Control response header (low bits) + localparam [3:0] ST_RESP_HDR_HI = 4'd8; // Receiving AXIS-Control response header (high bits) + localparam [3:0] ST_RESP_TS_LO = 4'd9; // Receiving AXIS-Control response timestamp (low bits) + localparam [3:0] ST_RESP_TS_HI = 4'd10; // Receiving AXIS-Control response timestamp (high bits) + localparam [3:0] ST_RESP_OP_WORD = 4'd11; // Receiving AXIS-Control response operation word + localparam [3:0] ST_RESP_OP_DATA = 4'd12; // Receiving AXIS-Control response data word + localparam [3:0] ST_SHORT_PKT_ERR = 4'd13; // Response was too short. Send a dummy response on ctrlport + localparam [3:0] ST_DROP_LONG_PKT = 4'd14; // Response was too long. Dump the rest of the packet + + // State variables + reg [3:0] state = ST_IDLE; // Current state for FSM + reg [5:0] seq_num = 6'd0; // Expected seqnum for response + // Request state + reg [3:0] req_opcode; // Cached opcode for transaction request + reg [19:0] req_addr; // Cached address for transaction request + reg [9:0] req_portid; // Cached port ID for transaction request + reg [15:0] req_rem_epid; // Cached remote endpoint ID for transaction request + reg [9:0] req_rem_portid; // Cached remote port ID for transaction request + reg [31:0] req_data; // Cached data word for transaction request + reg [3:0] req_byte_en; // Cached byte enable for transaction request + reg req_has_time; // Cached has_time bit for transaction request + reg [63:0] req_time; // Cached timestamp for transaction request + // Response state + reg resp_has_time; // Does the response have a timestamp? + reg [1:0] resp_status; // The status in the response + reg resp_seq_err, resp_cmd_err; // Error bits for the response + + always @(posedge clk) begin + if (rst) begin + state <= ST_IDLE; + seq_num <= 6'd0; + end else begin + case (state) + + // Ready to receive a request on ctrlport + // ------------------------------------ + ST_IDLE: begin + if (ctrlport_req_wr | ctrlport_req_rd) begin + // A transaction was posted on the slave ctrlport... + // Cache the opcode + if (ctrlport_req_wr & ctrlport_req_rd) + req_opcode <= AXIS_CTRL_OPCODE_WRITE_READ; + else if (ctrlport_req_rd) + req_opcode <= AXIS_CTRL_OPCODE_READ; + else + req_opcode <= AXIS_CTRL_OPCODE_WRITE; + // Cache transaction info + req_addr <= ctrlport_req_addr; + req_portid <= ctrlport_req_portid; + req_rem_epid <= ctrlport_req_rem_epid; + req_rem_portid <= ctrlport_req_rem_portid; + req_data <= ctrlport_req_data; + req_byte_en <= ctrlport_req_byte_en; + req_has_time <= ctrlport_req_has_time; + req_time <= ctrlport_req_time; + // Start sending out AXIS-Ctrl packet + state <= ST_REQ_HDR_LO; + end + end + + // Send a request AXIS comand + // (a state for each stage in the packet) + // ------------------------------------ + ST_REQ_HDR_LO: begin + if (m_axis_ctrl_tready) + state <= ST_REQ_HDR_HI; + end + ST_REQ_HDR_HI: begin + if (m_axis_ctrl_tready) + state <= req_has_time ? ST_REQ_TS_LO : ST_REQ_OP_WORD; + end + ST_REQ_TS_LO: begin + if (m_axis_ctrl_tready) + state <= ST_REQ_TS_HI; + end + ST_REQ_TS_HI: begin + if (m_axis_ctrl_tready) + state <= ST_REQ_OP_WORD; + end + ST_REQ_OP_WORD: begin + if (m_axis_ctrl_tready) + state <= ST_REQ_OP_DATA; + end + ST_REQ_OP_DATA: begin + if (m_axis_ctrl_tready) + state <= ST_RESP_HDR_LO; + end + + // Receive a response AXIS comand + // (a state for each stage in the packet) + // ------------------------------------ + ST_RESP_HDR_LO: begin + if (s_axis_ctrl_tvalid) begin + // Remeber if the packet is supposed to have a timestamp + resp_has_time <= axis_ctrl_get_has_time(s_axis_ctrl_tdata); + // Check for a sequence error + resp_seq_err <= (axis_ctrl_get_seq_num(s_axis_ctrl_tdata) != seq_num); + // Assert a command error if: + // - The port ID does not match + // - The response was too short (the next check) + resp_cmd_err <= (axis_ctrl_get_dst_port(s_axis_ctrl_tdata) != THIS_PORTID); + if (!s_axis_ctrl_tlast) begin + state <= ST_RESP_HDR_HI; + end else begin + // Response was too short + resp_cmd_err <= 1'b1; + state <= ST_SHORT_PKT_ERR; + end + end + end + ST_RESP_HDR_HI: begin + if (s_axis_ctrl_tvalid) begin + if (!s_axis_ctrl_tlast) begin + state <= resp_has_time ? ST_RESP_TS_LO : ST_RESP_OP_WORD; + end else begin + // Response was too short + resp_cmd_err <= 1'b1; + state <= ST_SHORT_PKT_ERR; + end + end + end + ST_RESP_TS_LO: begin + if (s_axis_ctrl_tvalid) begin + if (!s_axis_ctrl_tlast) begin + state <= ST_RESP_TS_HI; + end else begin + // Response was too short + resp_cmd_err <= 1'b1; + state <= ST_SHORT_PKT_ERR; + end + end + end + ST_RESP_TS_HI: begin + if (s_axis_ctrl_tvalid) begin + if (!s_axis_ctrl_tlast) begin + state <= ST_RESP_OP_WORD; + end else begin + // Response was too short + resp_cmd_err <= 1'b1; + state <= ST_SHORT_PKT_ERR; + end + end + end + ST_RESP_OP_WORD: begin + if (s_axis_ctrl_tvalid) begin + if (!s_axis_ctrl_tlast) begin + // Assert a command error if opcode and addr in request does not match response + resp_cmd_err <= resp_cmd_err || + (axis_ctrl_get_opcode(s_axis_ctrl_tdata) != req_opcode) || + (axis_ctrl_get_address(s_axis_ctrl_tdata) != req_addr); + resp_status <= axis_ctrl_get_status(s_axis_ctrl_tdata); + state <= ST_RESP_OP_DATA; + end else begin + // Response was too short + resp_cmd_err <= 1'b1; + state <= ST_SHORT_PKT_ERR; + end + end + end + ST_RESP_OP_DATA: begin + if (s_axis_ctrl_tvalid) begin + // If the packet was too long then just drop the rest without complaining + state <= s_axis_ctrl_tlast ? ST_IDLE : ST_DROP_LONG_PKT; + seq_num <= seq_num + 6'd1; + end + end + + // Error handling states + // ------------------------------------ + ST_SHORT_PKT_ERR: begin + state <= ST_IDLE; + end + ST_DROP_LONG_PKT: begin + if (s_axis_ctrl_tvalid && s_axis_ctrl_tlast) + state <= ST_IDLE; + end + + default: begin + // We should never get here + state <= ST_IDLE; + end + endcase + end + end + + // Logic to drive m_axis_ctrl_* + // ------------------------------------ + always @(*) begin + case (state) + ST_REQ_HDR_LO: begin + m_axis_ctrl_tdata = axis_ctrl_build_hdr_lo( + 1'b0 /* is_ack*/, req_has_time, seq_num, + 4'd1 /* num_data */, THIS_PORTID, req_portid); + end + ST_REQ_HDR_HI: begin + m_axis_ctrl_tdata = axis_ctrl_build_hdr_hi( + req_rem_portid, req_rem_epid); + end + ST_REQ_TS_LO: begin + m_axis_ctrl_tdata = req_time[31:0]; + end + ST_REQ_TS_HI: begin + m_axis_ctrl_tdata = req_time[63:32]; + end + ST_REQ_OP_WORD: begin + m_axis_ctrl_tdata = axis_ctrl_build_op_word( + AXIS_CTRL_STS_OKAY, req_opcode, req_byte_en, req_addr); + end + ST_REQ_OP_DATA: begin + m_axis_ctrl_tdata = req_data; + end + default: begin + m_axis_ctrl_tdata = 32'h0; + end + endcase + end + assign m_axis_ctrl_tvalid = (state == ST_REQ_HDR_LO) || + (state == ST_REQ_HDR_HI) || + (state == ST_REQ_TS_LO) || + (state == ST_REQ_TS_HI) || + (state == ST_REQ_OP_WORD) || + (state == ST_REQ_OP_DATA); + assign m_axis_ctrl_tlast = (state == ST_REQ_OP_DATA); + + // Logic to backpressure responses + // ------------------------------------ + assign s_axis_ctrl_tready = (state == ST_RESP_HDR_LO) || + (state == ST_RESP_HDR_HI) || + (state == ST_RESP_TS_LO) || + (state == ST_RESP_TS_HI) || + (state == ST_RESP_OP_WORD) || + (state == ST_RESP_OP_DATA) || + (state == ST_DROP_LONG_PKT); + + // Logic to drive Control-port response + // ------------------------------------ + assign ctrlport_resp_ack = (state == ST_RESP_OP_DATA && s_axis_ctrl_tvalid) || + (state == ST_SHORT_PKT_ERR); + assign ctrlport_resp_status = resp_cmd_err ? AXIS_CTRL_STS_CMDERR : + (resp_seq_err ? AXIS_CTRL_STS_WARNING : resp_status); + assign ctrlport_resp_data = (state == ST_SHORT_PKT_ERR) ? 32'h0 : s_axis_ctrl_tdata; + +endmodule // axis_ctrl_master diff --git a/fpga/usrp3/lib/rfnoc/core/axis_ctrl_slave.v b/fpga/usrp3/lib/rfnoc/core/axis_ctrl_slave.v new file mode 100644 index 000000000..558d21be2 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/axis_ctrl_slave.v @@ -0,0 +1,333 @@ +// +// Copyright 2018-2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ctrl_slave +// Description: +// This module implements an AXIS-Control slave (and a Control-Port +// master). Requests are accepted on the slave axis port and responses +// are sent out on the master axis port. This module implements the +// following operations: {SLEEP, READ, WRITE}. All other operations +// will be treated as a nop and the output will throw a CMDERR. +// +// Parameters: +// None +// +// Signals: +// - s_axis_ctrl_* : Input control stream (AXI-Stream) for requests +// - m_axis_ctrl_* : Output control stream (AXI-Stream) for responses +// - ctrlport_req_* : Control-port master request port +// - ctrlport_resp_* : Control-port master response port + +module axis_ctrl_slave ( + // CHDR Bus (master and slave) + input wire clk, + input wire rst, + // AXIS-Control Bus (Request) + input wire [31:0] s_axis_ctrl_tdata, + input wire s_axis_ctrl_tlast, + input wire s_axis_ctrl_tvalid, + output wire s_axis_ctrl_tready, + // AXIS-Control Bus (Response) + output wire [31:0] m_axis_ctrl_tdata, + output wire m_axis_ctrl_tlast, + output wire m_axis_ctrl_tvalid, + input wire m_axis_ctrl_tready, + // Control Port Endpoint (Request) + output wire ctrlport_req_wr, + output wire ctrlport_req_rd, + output wire [19:0] ctrlport_req_addr, + output wire [31:0] ctrlport_req_data, + output wire [3:0] ctrlport_req_byte_en, + output wire ctrlport_req_has_time, + output wire [63:0] ctrlport_req_time, + // Control Port Endpoint (Response) + input wire ctrlport_resp_ack, + input wire [1:0] ctrlport_resp_status, + input wire [31:0] ctrlport_resp_data +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_axis_ctrl_utils.vh" + + // --------------------------------------------------- + // Width converters + // --------------------------------------------------- + // Convert 32-bit messages to 64 bits for ease of handling + // and buffering. Convert back to 32 bits. + + wire [63:0] in64_tdata; + wire [1:0] in64_tkeep; + wire in64_tlast, in64_tvalid; + reg in64_tready; + + axis_width_conv #( + .WORD_W(32), .IN_WORDS(1), .OUT_WORDS(2), + .SYNC_CLKS(1), .PIPELINE("OUT") + ) upsizer_i ( + .s_axis_aclk(clk), .s_axis_rst(rst), + .s_axis_tdata(s_axis_ctrl_tdata), .s_axis_tkeep(1'b1), + .s_axis_tlast(s_axis_ctrl_tlast), + .s_axis_tvalid(s_axis_ctrl_tvalid), .s_axis_tready(s_axis_ctrl_tready), + .m_axis_aclk(clk), .m_axis_rst(rst), + .m_axis_tdata(in64_tdata), .m_axis_tkeep(in64_tkeep), + .m_axis_tlast(in64_tlast), + .m_axis_tvalid(in64_tvalid), .m_axis_tready(in64_tready) + ); + + reg [63:0] out64_tdata; + wire [1:0] out64_tkeep; + reg out64_tvalid; + wire out64_tlast, out64_terror, out64_tready; + + wire [63:0] out64_gt_tdata; + wire [1:0] out64_gt_tkeep; + wire out64_gt_tlast, out64_gt_tvalid, out64_gt_tready; + + // The header of the response packet is generated + // immediately when a request is received but the data + // comes much later. The packet gate will smooth out the + // outgoing responses. + + axi_packet_gate #( + .WIDTH(66), .SIZE(4) + ) gate_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({out64_tkeep, out64_tdata}), .i_tlast(out64_tlast), + .i_terror(out64_terror), + .i_tvalid(out64_tvalid), .i_tready(out64_tready), + .o_tdata({out64_gt_tkeep, out64_gt_tdata}), .o_tlast(out64_gt_tlast), + .o_tvalid(out64_gt_tvalid), .o_tready(out64_gt_tready) + ); + + axis_width_conv #( + .WORD_W(32), .IN_WORDS(2), .OUT_WORDS(1), + .SYNC_CLKS(1), .PIPELINE("IN") + ) downsizer_i ( + .s_axis_aclk(clk), .s_axis_rst(rst), + .s_axis_tdata(out64_gt_tdata), .s_axis_tkeep(out64_gt_tkeep), + .s_axis_tlast(out64_gt_tlast), + .s_axis_tvalid(out64_gt_tvalid), .s_axis_tready(out64_gt_tready), + .m_axis_aclk(clk), .m_axis_rst(rst), + .m_axis_tdata(m_axis_ctrl_tdata), .m_axis_tkeep(/*unused*/), + .m_axis_tlast(m_axis_ctrl_tlast), + .m_axis_tvalid(m_axis_ctrl_tvalid), .m_axis_tready(m_axis_ctrl_tready) + ); + + // --------------------------------------------------- + // Transaction Processor + // --------------------------------------------------- + + localparam [2:0] ST_IN_HDR = 3'd0; // Transferring input header to output + localparam [2:0] ST_IN_TS = 3'd1; // Transferring input timestamp to output + localparam [2:0] ST_IN_OP_WORD = 3'd2; // Processing input control word + localparam [2:0] ST_WAIT_FOR_ACK = 3'd3; // Waiting for a ctrlport response + localparam [2:0] ST_SLEEP = 3'd4; // Idle state for sleep operation + localparam [2:0] ST_OUT_OP_WORD = 3'd5; // Outputing control word after respose receipt + localparam [2:0] ST_MORE_DATA = 3'd6; // Control word is too long. Passing extra data forward + localparam [2:0] ST_DROP = 3'd7; // Something went wrong. Drop the current packet + + // State variables + reg [2:0] state = ST_IN_HDR; // Current state of FSM + reg [31:0] sleep_cntr = 32'd0; // Counter to count sleep cycles + reg cached_has_time = 1'b0; // Cached "has_time" bit for input transaction request + reg [63:0] cached_time; // Cached timestamp for input transaction request + reg [1:0] resp_status; // Status for outgoing response + reg [31:0] resp_data; // Data for outgoing response + + // Sleep is an internal operation + wire ctrlport_req_sleep; + + // Shortcuts (transaction request header) + wire is_ack = axis_ctrl_get_is_ack (in64_tdata[31:0] ); + wire has_time = axis_ctrl_get_has_time (in64_tdata[31:0] ); + wire [5:0] seq_num = axis_ctrl_get_seq_num (in64_tdata[31:0] ); + wire [3:0] num_data = axis_ctrl_get_num_data (in64_tdata[31:0] ); + wire [9:0] src_port = axis_ctrl_get_src_port (in64_tdata[31:0] ); + wire [9:0] dst_port = axis_ctrl_get_dst_port (in64_tdata[31:0] ); + wire [9:0] rem_dst_port = axis_ctrl_get_rem_dst_port(in64_tdata[63:32]); + wire [15:0] rem_dst_epid = axis_ctrl_get_rem_dst_epid(in64_tdata[63:32]); + wire malformed = (is_ack || num_data == 4'd0); + // Shortcuts (transaction request op-word) + wire [19:0] xact_address = axis_ctrl_get_address(in64_tdata[31:0]); + wire [3:0] xact_byte_en = axis_ctrl_get_byte_en(in64_tdata[31:0]); + wire [3:0] xact_opcode = axis_ctrl_get_opcode (in64_tdata[31:0]); + wire [31:0] xact_data = in64_tdata[63:32]; + + always @(posedge clk) begin + if (rst) begin + state <= ST_IN_HDR; + end else begin + case (state) + + // Receive an AXIS-Control request + // (a state for each stage in the packet) + // Except for the OP_WORD stage, the appropriate response + // line is also pushed to the output + // ------------------------------------ + ST_IN_HDR: begin + if (in64_tvalid && in64_tready) begin + cached_has_time <= has_time; + if (!in64_tlast) begin + if (malformed) // Malformed packet. Drop. + state <= ST_DROP; + else if (has_time) // Pkt has a timestamp + state <= ST_IN_TS; + else // Pkt has no timestamp + state <= ST_IN_OP_WORD; + end else begin + // Premature termination + // out64_terror will be asserted to cancel the outgoing response + state <= ST_IN_HDR; + end + end + end + ST_IN_TS: begin + if (in64_tvalid && in64_tready) begin + cached_time <= in64_tdata; + if (!in64_tlast) begin + state <= ST_IN_OP_WORD; + end else begin + // Premature termination + // out64_terror will be asserted to cancel the outgoing response + state <= ST_IN_HDR; + end + end + end + ST_IN_OP_WORD: begin + if (in64_tvalid) begin + if (ctrlport_req_sleep) begin + state <= ST_SLEEP; + sleep_cntr <= xact_data; + end else if (ctrlport_req_rd | ctrlport_req_wr) begin + state <= ST_WAIT_FOR_ACK; + end else begin + // Treat all other operations as a NOP (1 cycle sleep) + state <= ST_SLEEP; + sleep_cntr <= 32'd0; + resp_status <= AXIS_CTRL_STS_CMDERR; + end + end + end + + // Hold the input bus to implement a sleep + // ------------------------------------ + ST_SLEEP: begin + if (sleep_cntr == 32'd0) begin + state <= ST_OUT_OP_WORD; + resp_data <= xact_data; + // We could get to this state for an invalid opcode so + // only update the status if this is a legit sleep op + if (xact_opcode == AXIS_CTRL_OPCODE_SLEEP) + resp_status <= AXIS_CTRL_STS_OKAY; + end else begin + sleep_cntr <= sleep_cntr - 32'd1; + end + end + + // Wait for a response on the master ctrlport + // ------------------------------------ + ST_WAIT_FOR_ACK: begin + if (ctrlport_resp_ack) begin + resp_status <= ctrlport_resp_status; + if (xact_opcode == AXIS_CTRL_OPCODE_READ || + xact_opcode == AXIS_CTRL_OPCODE_WRITE_READ) + resp_data <= ctrlport_resp_data; + else + resp_data <= xact_data; + state <= ST_OUT_OP_WORD; + end + end + + // Send the AXIS-Control response data + // ------------------------------------ + ST_OUT_OP_WORD: begin + if (in64_tvalid && in64_tready) begin + state <= in64_tlast ? ST_IN_HDR : ST_MORE_DATA; + end + end + + // Framing error handlers + // ------------------------------------ + ST_MORE_DATA: begin + if (in64_tvalid && in64_tready && in64_tlast) + state <= ST_IN_HDR; + end + ST_DROP: begin + if (in64_tvalid && in64_tready && in64_tlast) + state <= ST_IN_HDR; + end + + default: begin + // We should never get here + state <= ST_IN_HDR; + end + endcase + end + end + + always @(*) begin + case (state) + ST_IN_HDR: begin // Swap src/dst and add resp flag when passing header + in64_tready = out64_tready; + out64_tdata = { + axis_ctrl_build_hdr_hi(rem_dst_port, rem_dst_epid), + axis_ctrl_build_hdr_lo(1'b1, has_time, seq_num, num_data, dst_port, src_port) + }; + out64_tvalid = in64_tvalid && !malformed; + end + ST_IN_TS: begin // Pass input to the output without modification + in64_tready = out64_tready; + out64_tdata = in64_tdata; + out64_tvalid = in64_tvalid; + end + ST_OUT_OP_WORD: begin // Update status and data when passing op-word + in64_tready = out64_tready; + out64_tdata = { + resp_data, + axis_ctrl_build_op_word(resp_status, xact_opcode, xact_byte_en, xact_address) + }; + out64_tvalid = in64_tvalid; + end + ST_MORE_DATA: begin // Pass input to the output without modification + in64_tready = out64_tready; + out64_tdata = in64_tdata; + out64_tvalid = in64_tvalid; + end + ST_DROP: begin // Consume input but don't produce output + in64_tready = 1'b1; + out64_tdata = 64'h0; + out64_tvalid = 1'b0; + end + default: begin // State machine is waiting. Don't produce output + in64_tready = 1'b0; + out64_tdata = 64'h0; + out64_tvalid = 1'b0; + end + endcase + end + + assign out64_tlast = in64_tlast; + assign out64_tkeep = in64_tkeep; + assign out64_terror = (state == ST_IN_HDR || state == ST_IN_TS) && in64_tlast; //Premature termination + + // Control-port request signals + assign ctrlport_req_sleep = in64_tvalid && (state == ST_IN_OP_WORD) && + (xact_opcode == AXIS_CTRL_OPCODE_SLEEP); + assign ctrlport_req_wr = in64_tvalid && (state == ST_IN_OP_WORD) && + (xact_opcode == AXIS_CTRL_OPCODE_WRITE || + xact_opcode == AXIS_CTRL_OPCODE_WRITE_READ); + assign ctrlport_req_rd = in64_tvalid && (state == ST_IN_OP_WORD) && + (xact_opcode == AXIS_CTRL_OPCODE_READ || + xact_opcode == AXIS_CTRL_OPCODE_WRITE_READ); + assign ctrlport_req_addr = xact_address; + assign ctrlport_req_byte_en = xact_byte_en; + assign ctrlport_req_data = xact_data; + assign ctrlport_req_has_time = cached_has_time; + assign ctrlport_req_time = cached_time; + +endmodule // axis_ctrl_slave diff --git a/fpga/usrp3/lib/rfnoc/core/axis_data_mdata_to_chdr.v b/fpga/usrp3/lib/rfnoc/core/axis_data_mdata_to_chdr.v new file mode 100644 index 000000000..dbeb35d08 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/axis_data_mdata_to_chdr.v @@ -0,0 +1,603 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_data_mdata_to_chdr +// +// Description: +// +// A framer module for CHDR data packets. It accepts an input data stream +// (with sideband information for packet flags and timestamp) and a separate +// metadata stream. A data packet and a metadata packet are required to be +// input in order for a single CHDR packet to be generated. If no metadata is +// associated with the payload, then an empty metadata packet must be input +// along with the data packet (i.e., input a metadata packet with +// s_axis_mdata_tkeep set to 0). +// +// The sideband information (e.g., timestamp, flags) must be input coincident +// with the AXI-Stream data input and will be sampled coincident with the +// last word of data in the packet (i.e., when tlast is asserted). +// +// This module also performs an optional clock crossing and data width +// conversion from a user requested width for the payload bus to CHDR_W. +// +// In order to guarantee a gapless CHDR data stream, the metadata packet +// should be input before the end of the data packet, although this is not +// required. +// +// Parameters: +// +// CHDR_W : Width of the input CHDR bus in bits +// ITEM_W : Width of the output item bus in bits +// NIPC : The number of output items delivered per cycle +// SYNC_CLKS : Are the CHDR and data clocks synchronous to each other? +// MTU : Log2 of the maximum packet size in CHDR words +// INFO_FIFO_SIZE : Log2 of the info FIFO size. This determines the number of +// packets that can be simultaneously buffered in the +// payload FIFO. +// PYLD_FIFO_SIZE : Log2 of the payload FIFO size. The actual FIFO size will +// be the maximum of 2**MTU or 2**PYLD_FIFO_SIZE, since the +// FIFO must be at least one MTU so that we can calculate +// the packet length in the header. +// +// Signals: +// +// m_axis_chdr_* : Output CHDR stream +// s_axis_* : Input data stream (AXI-Stream) +// s_axis_mdata_* : Input metadata stream (AXI-Stream) +// flush_* : Signals for flush control and status +// + +module axis_data_mdata_to_chdr #( + parameter CHDR_W = 256, + parameter ITEM_W = 32, + parameter NIPC = 2, + parameter SYNC_CLKS = 0, + parameter MTU = 10, + parameter INFO_FIFO_SIZE = 4, + parameter PYLD_FIFO_SIZE = MTU +)( + // Clock, reset and settings + input wire axis_chdr_clk, + input wire axis_chdr_rst, + input wire axis_data_clk, + input wire axis_data_rst, + // CHDR out (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_chdr_tdata, + output wire m_axis_chdr_tlast, + output wire m_axis_chdr_tvalid, + input wire m_axis_chdr_tready, + // Payload data stream in (AXI-Stream) + input wire [(ITEM_W*NIPC)-1:0] s_axis_tdata, + input wire [NIPC-1:0] s_axis_tkeep, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + // Payload sideband info + input wire [63:0] s_axis_ttimestamp, + input wire s_axis_thas_time, + input wire s_axis_teov, + input wire s_axis_teob, + // Metadata stream in (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_mdata_tdata, + input wire s_axis_mdata_tlast, + input wire s_axis_mdata_tkeep, + input wire s_axis_mdata_tvalid, + output wire s_axis_mdata_tready, + // Flush signals + input wire flush_en, + input wire [31:0] flush_timeout, + output wire flush_active, + output wire flush_done +); + + // Make sure the metadata FIFO is large enough to store an entire packet's + // worth of metadata (32 words). + localparam MDATA_FIFO_SIZE = 5; + + // Make sure the payload FIFO is large enough to store an entire packet's + // worth of payload data. This will ensure that we can buffer the entire + // packet to calculate its length. + localparam PAYLOAD_FIFO_SIZE = PYLD_FIFO_SIZE > MTU ? + PYLD_FIFO_SIZE : MTU; + + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_axis_ctrl_utils.vh" + + + //--------------------------------------------------------------------------- + // Timestamp and Flags Capture + //--------------------------------------------------------------------------- + // + // The timestamp and flags that we use for each packet is that of the last + // data word. Here, we capture this information at the end of the packet. + // + //--------------------------------------------------------------------------- + + reg [63:0] packet_timestamp; + reg packet_has_time; + reg packet_eov; + reg packet_eob; + + always @(posedge axis_data_clk) begin + if (s_axis_tvalid & s_axis_tready & s_axis_tlast) begin + packet_timestamp <= s_axis_ttimestamp; + packet_has_time <= s_axis_thas_time; + packet_eov <= s_axis_teov; + packet_eob <= s_axis_teob; + end + end + + + //--------------------------------------------------------------------------- + // Length Counters + //--------------------------------------------------------------------------- + // + // Here We track the state of the incoming packet to determine the payload + // and mdata length. + // + //--------------------------------------------------------------------------- + + localparam HDR_LEN = CHDR_W/8; // Length of CHDR header word in bytes + + reg [15:0] packet_length; + reg [15:0] length_count = HDR_LEN; + reg in_pkt_info_tvalid = 0; + wire in_pkt_info_tready; + + always @(posedge axis_data_clk) begin : pkt_length_counter + if (axis_data_rst) begin + length_count <= HDR_LEN; + in_pkt_info_tvalid <= 1'b0; + end else begin : pkt_length_counter_main + // Calculate the length of this word in bytes, taking tkeep into account + integer i; + integer num_bytes; + num_bytes = 0; + for (i = 0; i < NIPC; i = i + 1) begin + num_bytes = num_bytes + (s_axis_tkeep[i]*(ITEM_W/8)); + end + + // Update the packet length if the word is accepted + in_pkt_info_tvalid <= 1'b0; + if (s_axis_tvalid && s_axis_tready) begin + if (s_axis_tlast) begin + length_count <= HDR_LEN; + packet_length <= length_count + num_bytes; + in_pkt_info_tvalid <= 1'b1; + end else begin + length_count <= length_count + num_bytes; + end + end + end + end + + + reg [4:0] num_mdata = 0; + reg [4:0] mdata_count = 0; + reg in_mdata_info_tvalid = 0; + wire in_mdata_info_tready; + + always @(posedge axis_data_clk) begin : num_mdata_counter + if (axis_data_rst) begin + mdata_count <= 0; + num_mdata <= 0; + in_mdata_info_tvalid <= 1'b0; + end else begin : num_mdata_counter_main + // Update the mdata length if the word is accepted + in_mdata_info_tvalid <= 1'b0; + if (s_axis_mdata_tvalid && s_axis_mdata_tready) begin + if (s_axis_mdata_tlast) begin + mdata_count <= 0; + num_mdata <= mdata_count + s_axis_mdata_tkeep; + in_mdata_info_tvalid <= 1'b1; + end else begin + mdata_count <= mdata_count + s_axis_mdata_tkeep; + end + end + end + end + + + //--------------------------------------------------------------------------- + // Data Width Converter (ITEM_W*NIPC => CHDR_W) + //--------------------------------------------------------------------------- + + wire [CHDR_W-1:0] in_pyld_tdata; + wire in_pyld_tlast; + wire in_pyld_tvalid; + wire in_pyld_tready; + wire width_conv_tready; + + assign width_conv_tready = in_pyld_tready & in_pkt_info_tready; + + generate + if (NIPC != CHDR_W/ITEM_W) begin : gen_axis_width_conv + axis_width_conv #( + .WORD_W (ITEM_W), + .IN_WORDS (NIPC), + .OUT_WORDS (CHDR_W/ITEM_W), + .SYNC_CLKS (1), + .PIPELINE ("IN") + ) payload_width_conv_i ( + .s_axis_aclk (axis_data_clk), + .s_axis_rst (axis_data_rst), + .s_axis_tdata (s_axis_tdata), + .s_axis_tkeep ({NIPC{1'b1}}), + .s_axis_tlast (s_axis_tlast), + .s_axis_tvalid (s_axis_tvalid), + .s_axis_tready (s_axis_tready), + .m_axis_aclk (axis_data_clk), + .m_axis_rst (axis_data_rst), + .m_axis_tdata (in_pyld_tdata), + .m_axis_tkeep (), + .m_axis_tlast (in_pyld_tlast), + .m_axis_tvalid (in_pyld_tvalid), + .m_axis_tready (width_conv_tready) + ); + end else begin : no_gen_axis_width_conv + assign in_pyld_tdata = s_axis_tdata; + assign in_pyld_tlast = s_axis_tlast; + assign in_pyld_tvalid = s_axis_tvalid; + assign s_axis_tready = width_conv_tready; + end + endgenerate + + + //--------------------------------------------------------------------------- + // Input FIFOs + //--------------------------------------------------------------------------- + // + // Buffer the data, packet info, metadata, and cross it into the CHDR clock + // domain, if needed. The payload FIFO is sized to match the MTU so that an + // entire packet can be buffered while the length is calculated. + // + //--------------------------------------------------------------------------- + + wire [CHDR_W-1:0] out_mdata_tdata, out_pyld_tdata; + wire out_mdata_tlast, out_pyld_tlast; + wire out_mdata_tvalid, out_pyld_tvalid; + reg out_mdata_tready, out_pyld_tready; + + wire out_pkt_info_tvalid; + reg out_pkt_info_tready; + wire out_eob, out_eov, out_has_time; + wire [63:0] out_timestamp; + wire [15:0] out_length; + + wire [4:0] out_num_mdata; + reg out_mdata_info_tready; + wire out_mdata_info_tvalid; + + wire in_mdata_tready; + + + assign s_axis_mdata_tready = in_mdata_tready & in_mdata_info_tready; + + generate if (SYNC_CLKS) begin : gen_sync_fifo + axi_fifo #( + .WIDTH (CHDR_W+1), + .SIZE (PAYLOAD_FIFO_SIZE) + ) pyld_fifo ( + .clk (axis_chdr_clk), + .reset (axis_chdr_rst), + .clear (1'b0), + .i_tdata ({in_pyld_tlast, in_pyld_tdata}), + .i_tvalid (in_pyld_tvalid), + .i_tready (in_pyld_tready), + .o_tdata ({out_pyld_tlast, out_pyld_tdata}), + .o_tvalid (out_pyld_tvalid), + .o_tready (out_pyld_tready), + .space (), + .occupied () + ); + axi_fifo #( + .WIDTH (CHDR_W + 1), + .SIZE (MDATA_FIFO_SIZE) + ) mdata_fifo ( + .clk (axis_chdr_clk), + .reset (axis_chdr_rst), + .clear (1'b0), + .i_tdata ({s_axis_mdata_tlast, s_axis_mdata_tdata}), + .i_tvalid (s_axis_mdata_tvalid), + .i_tready (in_mdata_tready), + .o_tdata ({out_mdata_tlast, out_mdata_tdata}), + .o_tvalid (out_mdata_tvalid), + .o_tready (out_mdata_tready), + .space (), + .occupied () + ); + axi_fifo #( + .WIDTH (3 + 64 + 16), + .SIZE (INFO_FIFO_SIZE) + ) pkt_info_fifo ( + .clk (axis_chdr_clk), + .reset (axis_chdr_rst), + .clear (1'b0), + .i_tdata ({packet_eob, packet_eov, packet_has_time,packet_timestamp, packet_length}), + .i_tvalid (in_pkt_info_tvalid), + .i_tready (in_pkt_info_tready), + .o_tdata ({out_eob, out_eov, out_has_time, out_timestamp, out_length}), + .o_tvalid (out_pkt_info_tvalid), + .o_tready (out_pkt_info_tready), + .space (), + .occupied () + ); + axi_fifo #( + .WIDTH (5), + .SIZE (INFO_FIFO_SIZE) + ) mdata_info_fifo ( + .clk (axis_chdr_clk), + .reset (axis_chdr_rst), + .clear (1'b0), + .i_tdata (num_mdata), + .i_tvalid (in_mdata_info_tvalid), + .i_tready (in_mdata_info_tready), + .o_tdata (out_num_mdata), + .o_tvalid (out_mdata_info_tvalid), + .o_tready (out_mdata_info_tready), + .space (), + .occupied () + ); + + end else begin : gen_async_fifo + axi_fifo_2clk #( + .WIDTH (CHDR_W + 1), + .SIZE (PAYLOAD_FIFO_SIZE) + ) pyld_fifo ( + .reset (axis_data_rst), + .i_aclk (axis_data_clk), + .i_tdata ({in_pyld_tlast, in_pyld_tdata}), + .i_tvalid (in_pyld_tvalid), + .i_tready (in_pyld_tready), + .o_aclk (axis_chdr_clk), + .o_tdata ({out_pyld_tlast, out_pyld_tdata}), + .o_tvalid (out_pyld_tvalid), + .o_tready (out_pyld_tready) + ); + axi_fifo_2clk #( + .WIDTH (CHDR_W + 1), + .SIZE (MDATA_FIFO_SIZE) + ) mdata_fifo ( + .reset (axis_data_rst), + .i_aclk (axis_data_clk), + .i_tdata ({s_axis_mdata_tlast, s_axis_mdata_tdata}), + .i_tvalid (s_axis_mdata_tvalid), + .i_tready (in_mdata_tready), + .o_aclk (axis_chdr_clk), + .o_tdata ({out_mdata_tlast, out_mdata_tdata}), + .o_tvalid (out_mdata_tvalid), + .o_tready (out_mdata_tready) + ); + axi_fifo_2clk #( + .WIDTH (3 + 64 + 16), + .SIZE (INFO_FIFO_SIZE) + ) pkt_info_fifo ( + .reset (axis_data_rst), + .i_aclk (axis_data_clk), + .i_tdata ({packet_eob, packet_eov, packet_has_time,packet_timestamp, packet_length}), + .i_tvalid (in_pkt_info_tvalid), + .i_tready (in_pkt_info_tready), + .o_aclk (axis_chdr_clk), + .o_tdata ({out_eob, out_eov, out_has_time, out_timestamp, out_length}), + .o_tvalid (out_pkt_info_tvalid), + .o_tready (out_pkt_info_tready) + ); + axi_fifo_2clk #( + .WIDTH (5), + .SIZE (INFO_FIFO_SIZE) + ) mdata_info_fifo ( + .reset (axis_data_rst), + .i_aclk (axis_data_clk), + .i_tdata (num_mdata), + .i_tvalid (in_mdata_info_tvalid), + .i_tready (in_mdata_info_tready), + .o_aclk (axis_chdr_clk), + .o_tdata (out_num_mdata), + .o_tvalid (out_mdata_info_tvalid), + .o_tready (out_mdata_info_tready) + ); + end endgenerate + + + //--------------------------------------------------------------------------- + // Output State Machine + //--------------------------------------------------------------------------- + + reg [CHDR_W-1:0] chdr_pf_tdata; + reg chdr_pf_tlast, chdr_pf_tvalid; + wire chdr_pf_tready; + + localparam [1:0] ST_HDR = 0; // Processing the output CHDR header + localparam [1:0] ST_TS = 1; // Processing the output CHDR timestamp + localparam [1:0] ST_MDATA = 2; // Processing the output CHDR metadata word + localparam [1:0] ST_PYLD = 3; // Processing the output CHDR payload word + + reg [1:0] state = ST_HDR; + + reg [15:0] seq_num = 0; + + wire [63:0] header; + reg [63:0] timestamp; + wire [15:0] length; + reg has_mdata; + + // Some the payload, metadata, and timestamp lengths (out_length already + // includes the header). + assign length = (CHDR_W > 64) ? + out_length + out_num_mdata * (CHDR_W/8) : + out_length + out_num_mdata * (CHDR_W/8) + 8*out_has_time; + + // Build the header word + assign header = chdr_build_header( + 6'b0, // vc + out_eob, // eob + out_eov, // eov + out_has_time ? CHDR_PKT_TYPE_DATA_TS : + CHDR_PKT_TYPE_DATA, // pkt_type + out_num_mdata, // num_mdata + seq_num, // seq_num + length, // length + 16'b0 // dst_epid + ); + + always @(posedge axis_chdr_clk) begin + if (axis_chdr_rst) begin + state <= ST_HDR; + seq_num <= 0; + end else begin + case (state) + + // ST_HDR: CHDR Header + // ------------------- + ST_HDR: begin + timestamp <= out_timestamp; + has_mdata <= (out_num_mdata != CHDR_NO_MDATA); + + if (out_pkt_info_tvalid && out_mdata_info_tvalid && chdr_pf_tready) begin + if (CHDR_W > 64) begin + // When CHDR_W > 64, the timestamp is a part of the header word. + // If this is a data packet (with or without a TS), we skip the + // timestamp state move directly to metadata/body. + if (out_num_mdata == CHDR_NO_MDATA) begin + state <= ST_PYLD; + end else begin + state <= ST_MDATA; + end + end else begin + // When CHDR_W == 64, the timestamp comes after the header. Check + // if this is a data packet with a timestamp or metadata to + // figure out the next state. + if (out_has_time) begin + state <= ST_TS; + end else if (out_num_mdata != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_PYLD; + end + end + end + end + + // ST_TS: Timestamp (CHDR_W == 64 only) + // ------------------------------------ + ST_TS: begin + if (chdr_pf_tready) begin + state <= has_mdata ? ST_MDATA : ST_PYLD; + end + end + + // ST_MDATA: Metadata word + // ----------------------- + ST_MDATA: begin + if (out_mdata_tvalid && out_mdata_tready && out_mdata_tlast) begin + state <= ST_PYLD; + end + end + + // ST_PYLD: Payload word + // --------------------- + ST_PYLD: begin + if (out_pyld_tvalid && out_pyld_tready && out_pyld_tlast) begin + state <= ST_HDR; + seq_num <= seq_num + 1; + end + end + + default: begin + // We should never get here + state <= ST_HDR; + end + endcase + end + end + + always @(*) begin + case (state) + ST_HDR: begin + // Insert header word + chdr_pf_tdata = (CHDR_W > 64) ? { out_timestamp, header } : header; + chdr_pf_tvalid = out_pkt_info_tvalid & out_mdata_info_tvalid; + chdr_pf_tlast = 1'b0; + out_mdata_tready = chdr_pf_tready & // Remove empty mdata packet from FIFO + (out_num_mdata == CHDR_NO_MDATA); + out_mdata_info_tready = chdr_pf_tready; // Remove mdata info word from FIFO + out_pyld_tready = 1'b0; + out_pkt_info_tready = chdr_pf_tready; // Remove packet info word from FIFO + end + ST_TS: begin + // Insert timestamp + chdr_pf_tdata[63:0] = timestamp; + chdr_pf_tvalid = 1'b1; // Timestamp register is always valid in this state + chdr_pf_tlast = 1'b0; + out_mdata_tready = 1'b0; + out_mdata_info_tready = 1'b0; + out_pyld_tready = 1'b0; + out_pkt_info_tready = 1'b0; + end + ST_MDATA: begin + // Insert mdata words + chdr_pf_tdata = out_mdata_tdata; + chdr_pf_tvalid = out_mdata_tvalid; + chdr_pf_tlast = 1'b0; + out_mdata_tready = chdr_pf_tready; + out_mdata_info_tready = 1'b0; + out_pyld_tready = 1'b0; + out_pkt_info_tready = 1'b0; + end + ST_PYLD: begin + // Insert payload words + chdr_pf_tdata = out_pyld_tdata; + chdr_pf_tvalid = out_pyld_tvalid; + chdr_pf_tlast = out_pyld_tlast; + out_mdata_tready = 1'b0; + out_mdata_info_tready = 1'b0; + out_pyld_tready = chdr_pf_tready; + out_pkt_info_tready = 1'b0; + end + default: begin + chdr_pf_tdata = out_pyld_tdata; + chdr_pf_tvalid = 1'b0; + chdr_pf_tlast = 1'b0; + out_mdata_tready = 1'b0; + out_mdata_info_tready = 1'b0; + out_pyld_tready = 1'b0; + out_pkt_info_tready = 1'b0; + end + endcase + end + + + //--------------------------------------------------------------------------- + // Flushing Logic + //--------------------------------------------------------------------------- + + axis_packet_flush #( + .WIDTH (CHDR_W), + .FLUSH_PARTIAL_PKTS (0), + .TIMEOUT_W (32), + .PIPELINE ("IN") + ) chdr_flusher_i ( + .clk (axis_chdr_clk), + .reset (axis_chdr_rst), + .enable (flush_en), + .timeout (flush_timeout), + .flushing (flush_active), + .done (flush_done), + .s_axis_tdata (chdr_pf_tdata), + .s_axis_tlast (chdr_pf_tlast), + .s_axis_tvalid (chdr_pf_tvalid), + .s_axis_tready (chdr_pf_tready), + .m_axis_tdata (m_axis_chdr_tdata), + .m_axis_tlast (m_axis_chdr_tlast), + .m_axis_tvalid (m_axis_chdr_tvalid), + .m_axis_tready (m_axis_chdr_tready) + ); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/core/axis_data_to_chdr.v b/fpga/usrp3/lib/rfnoc/core/axis_data_to_chdr.v new file mode 100644 index 000000000..6a5b3ce05 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/axis_data_to_chdr.v @@ -0,0 +1,452 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_data_to_chdr +// +// Description: +// +// A framer module for CHDR data packets. It accepts an input data stream +// with sideband information for packet flags and timestamp). A CHDR packet +// will be generated for each data packet that is input. +// +// The sideband information (e.g., timestamp, flags) must be input coincident +// with the AXI-Stream data input and will be sampled coincident with the +// last word of data in the packet (i.e., when tlast is asserted). +// +// This module also performs an optional clock crossing and data width +// conversion from a user requested width for the payload bus to CHDR_W. +// +// Parameters: +// +// CHDR_W : Width of the input CHDR bus in bits +// ITEM_W : Width of the output item bus in bits +// NIPC : The number of output items delivered per cycle +// SYNC_CLKS : Are the CHDR and data clocks synchronous to each other? +// MTU : Log2 of the maximum packet size in CHDR words +// INFO_FIFO_SIZE : Log2 of the info FIFO size. This determines the number of +// packets that can be simultaneously buffered in the +// payload FIFO. +// PYLD_FIFO_SIZE : Log2 of the payload FIFO size. The actual FIFO size will +// be the maximum of 2**MTU or 2**PYLD_FIFO_SIZE, since the +// FIFO must be at least one MTU so that we can calculate +// the packet length in the header. +// +// Signals: +// +// m_axis_chdr_* : Output CHDR stream +// s_axis_* : Input data stream (AXI-Stream) +// flush_* : Signals for flush control and status +// + +module axis_data_to_chdr #( + parameter CHDR_W = 256, + parameter ITEM_W = 32, + parameter NIPC = 2, + parameter SYNC_CLKS = 0, + parameter MTU = 10, + parameter INFO_FIFO_SIZE = 5, + parameter PYLD_FIFO_SIZE = MTU +)( + // Clock, reset and settings + input wire axis_chdr_clk, + input wire axis_chdr_rst, + input wire axis_data_clk, + input wire axis_data_rst, + // CHDR out (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_chdr_tdata, + output wire m_axis_chdr_tlast, + output wire m_axis_chdr_tvalid, + input wire m_axis_chdr_tready, + // Payload data stream in (AXI-Stream) + input wire [(ITEM_W*NIPC)-1:0] s_axis_tdata, + input wire [NIPC-1:0] s_axis_tkeep, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + // Payload sideband info + input wire [63:0] s_axis_ttimestamp, + input wire s_axis_thas_time, + input wire s_axis_teov, + input wire s_axis_teob, + // Flush signals + input wire flush_en, + input wire [31:0] flush_timeout, + output wire flush_active, + output wire flush_done +); + + // Make sure the payload FIFO is large enough to store an entire packet's + // worth of payload data. This will ensure that we can buffer the entire + // packet to calculate its length. + localparam PAYLOAD_FIFO_SIZE = PYLD_FIFO_SIZE > MTU ? + PYLD_FIFO_SIZE : MTU; + + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_axis_ctrl_utils.vh" + + + //--------------------------------------------------------------------------- + // Timestamp and Flags Capture + //--------------------------------------------------------------------------- + // + // The timestamp and flags that we use for each packet is that of the last + // data word. Here, we capture this information at the end of the packet. + // + //--------------------------------------------------------------------------- + + reg [63:0] packet_timestamp; + reg packet_has_time; + reg packet_eov; + reg packet_eob; + + always @(posedge axis_data_clk) begin + if (s_axis_tvalid & s_axis_tready & s_axis_tlast) begin + packet_timestamp <= s_axis_ttimestamp; + packet_has_time <= s_axis_thas_time; + packet_eov <= s_axis_teov; + packet_eob <= s_axis_teob; + end + end + + + //--------------------------------------------------------------------------- + // Length Counters + //--------------------------------------------------------------------------- + // + // Here We track the state of the incoming packet to determine the payload + // length. + // + //--------------------------------------------------------------------------- + + localparam HDR_LEN = CHDR_W/8; // Length of CHDR header word in bytes + + reg [15:0] packet_length; + reg [15:0] length_count = HDR_LEN; + reg in_pkt_info_tvalid = 0; + wire in_pkt_info_tready; + + always @(posedge axis_data_clk) begin : pkt_length_counter + if (axis_data_rst) begin + length_count <= HDR_LEN; + in_pkt_info_tvalid <= 1'b0; + end else begin : pkt_length_counter_main + // Calculate the length of this word in bytes, taking tkeep into account + integer i; + integer num_bytes; + num_bytes = 0; + for (i = 0; i < NIPC; i = i + 1) begin + num_bytes = num_bytes + (s_axis_tkeep[i]*(ITEM_W/8)); + end + + // Update the packet length if the word is accepted + in_pkt_info_tvalid <= 1'b0; + if (s_axis_tvalid && s_axis_tready) begin + if (s_axis_tlast) begin + length_count <= HDR_LEN; + packet_length <= length_count + num_bytes; + in_pkt_info_tvalid <= 1'b1; + end else begin + length_count <= length_count + num_bytes; + end + end + end + end + + + //--------------------------------------------------------------------------- + // Data Width Converter (ITEM_W*NIPC => CHDR_W) + //--------------------------------------------------------------------------- + + wire [CHDR_W-1:0] in_pyld_tdata; + wire in_pyld_tlast; + wire in_pyld_tvalid; + wire in_pyld_tready; + wire width_conv_tready; + + assign width_conv_tready = in_pyld_tready & in_pkt_info_tready; + + generate + if (NIPC != CHDR_W/ITEM_W) begin : gen_axis_width_conv + axis_width_conv #( + .WORD_W (ITEM_W), + .IN_WORDS (NIPC), + .OUT_WORDS (CHDR_W/ITEM_W), + .SYNC_CLKS (1), + .PIPELINE ("IN") + ) payload_width_conv_i ( + .s_axis_aclk (axis_data_clk), + .s_axis_rst (axis_data_rst), + .s_axis_tdata (s_axis_tdata), + .s_axis_tkeep ({NIPC{1'b1}}), + .s_axis_tlast (s_axis_tlast), + .s_axis_tvalid (s_axis_tvalid), + .s_axis_tready (s_axis_tready), + .m_axis_aclk (axis_data_clk), + .m_axis_rst (axis_data_rst), + .m_axis_tdata (in_pyld_tdata), + .m_axis_tkeep (), + .m_axis_tlast (in_pyld_tlast), + .m_axis_tvalid (in_pyld_tvalid), + .m_axis_tready (width_conv_tready) + ); + end else begin : no_gen_axis_width_conv + assign in_pyld_tdata = s_axis_tdata; + assign in_pyld_tlast = s_axis_tlast; + assign in_pyld_tvalid = s_axis_tvalid; + assign s_axis_tready = width_conv_tready; + end + endgenerate + + + //--------------------------------------------------------------------------- + // Input FIFOs + //--------------------------------------------------------------------------- + // + // Buffer the data, packet info, metadata, and cross it into the CHDR clock + // domain, if needed. The payload FIFO is sized to match the MTU so that an + // entire packet can be buffered while the length is calculated. + // + //--------------------------------------------------------------------------- + + wire [CHDR_W-1:0] out_pyld_tdata; + wire out_pyld_tlast; + wire out_pyld_tvalid; + reg out_pyld_tready; + + wire out_pkt_info_tvalid; + reg out_pkt_info_tready; + wire out_eob, out_eov, out_has_time; + wire [63:0] out_timestamp; + wire [15:0] out_length; + + generate if (SYNC_CLKS) begin : gen_sync_fifo + axi_fifo #( + .WIDTH (CHDR_W+1), + .SIZE (PAYLOAD_FIFO_SIZE) + ) pyld_fifo ( + .clk (axis_chdr_clk), + .reset (axis_chdr_rst), + .clear (1'b0), + .i_tdata ({in_pyld_tlast, in_pyld_tdata}), + .i_tvalid (in_pyld_tvalid), + .i_tready (in_pyld_tready), + .o_tdata ({out_pyld_tlast, out_pyld_tdata}), + .o_tvalid (out_pyld_tvalid), + .o_tready (out_pyld_tready), + .space (), + .occupied () + ); + axi_fifo #( + .WIDTH (3 + 64 + 16), + .SIZE (INFO_FIFO_SIZE) + ) pkt_info_fifo ( + .clk (axis_chdr_clk), + .reset (axis_chdr_rst), + .clear (1'b0), + .i_tdata ({packet_eob, packet_eov, packet_has_time,packet_timestamp, packet_length}), + .i_tvalid (in_pkt_info_tvalid), + .i_tready (in_pkt_info_tready), + .o_tdata ({out_eob, out_eov, out_has_time, out_timestamp, out_length}), + .o_tvalid (out_pkt_info_tvalid), + .o_tready (out_pkt_info_tready), + .space (), + .occupied () + ); + + end else begin : gen_async_fifo + axi_fifo_2clk #( + .WIDTH (CHDR_W + 1), + .SIZE (PAYLOAD_FIFO_SIZE) + ) pyld_fifo ( + .reset (axis_data_rst), + .i_aclk (axis_data_clk), + .i_tdata ({in_pyld_tlast, in_pyld_tdata}), + .i_tvalid (in_pyld_tvalid), + .i_tready (in_pyld_tready), + .o_aclk (axis_chdr_clk), + .o_tdata ({out_pyld_tlast, out_pyld_tdata}), + .o_tvalid (out_pyld_tvalid), + .o_tready (out_pyld_tready) + ); + axi_fifo_2clk #( + .WIDTH (3 + 64 + 16), + .SIZE (INFO_FIFO_SIZE) + ) pkt_info_fifo ( + .reset (axis_data_rst), + .i_aclk (axis_data_clk), + .i_tdata ({packet_eob, packet_eov, packet_has_time,packet_timestamp, packet_length}), + .i_tvalid (in_pkt_info_tvalid), + .i_tready (in_pkt_info_tready), + .o_aclk (axis_chdr_clk), + .o_tdata ({out_eob, out_eov, out_has_time, out_timestamp, out_length}), + .o_tvalid (out_pkt_info_tvalid), + .o_tready (out_pkt_info_tready) + ); + end endgenerate + + + //--------------------------------------------------------------------------- + // Output State Machine + //--------------------------------------------------------------------------- + + reg [CHDR_W-1:0] chdr_pf_tdata; + reg chdr_pf_tlast, chdr_pf_tvalid; + wire chdr_pf_tready; + + localparam [1:0] ST_HDR = 0; // Processing the output CHDR header + localparam [1:0] ST_TS = 1; // Processing the output CHDR timestamp + localparam [1:0] ST_PYLD = 2; // Processing the output CHDR payload word + + reg [1:0] state = ST_HDR; + + reg [15:0] seq_num = 0; + + wire [63:0] header; + reg [63:0] timestamp; + wire [15:0] length; + + // Some the payload, metadata, and timestamp lengths (out_length already + // includes the header). + assign length = (CHDR_W > 64) ? out_length : out_length + 8*out_has_time; + + // Build the header word + assign header = chdr_build_header( + 6'b0, // vc + out_eob, // eob + out_eov, // eov + out_has_time ? CHDR_PKT_TYPE_DATA_TS : + CHDR_PKT_TYPE_DATA, // pkt_type + 0, // num_mdata + seq_num, // seq_num + length, // length + 16'b0 // dst_epid + ); + + always @(posedge axis_chdr_clk) begin + if (axis_chdr_rst) begin + state <= ST_HDR; + seq_num <= 0; + end else begin + case (state) + + // ST_HDR: CHDR Header + // ------------------- + ST_HDR: begin + timestamp <= out_timestamp; + + if (out_pkt_info_tvalid && chdr_pf_tready) begin + seq_num <= seq_num + 1; + + if (CHDR_W > 64) begin + // When CHDR_W > 64, the timestamp is a part of the header word. + // If this is a data packet (with or without a TS), we skip the + // timestamp state move directly to the payload. + state <= ST_PYLD; + end else begin + // When CHDR_W == 64, the timestamp comes after the header. Check + // if this is a data packet with a timestamp to figure out the + // next state. + if (out_has_time) begin + state <= ST_TS; + end else begin + state <= ST_PYLD; + end + end + end + end + + // ST_TS: Timestamp (CHDR_W == 64 only) + // ------------------------------------ + ST_TS: begin + if (chdr_pf_tready) begin + state <= ST_PYLD; + end + end + + // ST_PYLD: Payload word + // --------------------- + ST_PYLD: begin + if (out_pyld_tvalid && out_pyld_tready && out_pyld_tlast) begin + state <= ST_HDR; + end + end + + default: begin + // We should never get here + state <= ST_HDR; + end + endcase + end + end + + always @(*) begin + case (state) + ST_HDR: begin + // Insert header word + chdr_pf_tdata = (CHDR_W > 64) ? { out_timestamp, header } : header; + chdr_pf_tvalid = out_pkt_info_tvalid; + chdr_pf_tlast = 1'b0; + out_pyld_tready = 1'b0; + out_pkt_info_tready = chdr_pf_tready; // Remove packet info word from FIFO + end + ST_TS: begin + // Insert timestamp + chdr_pf_tdata[63:0] = timestamp; + chdr_pf_tvalid = 1'b1; // Timestamp register is always valid in this state + chdr_pf_tlast = 1'b0; + out_pyld_tready = 1'b0; + out_pkt_info_tready = 1'b0; + end + ST_PYLD: begin + // Insert payload words + chdr_pf_tdata = out_pyld_tdata; + chdr_pf_tvalid = out_pyld_tvalid; + chdr_pf_tlast = out_pyld_tlast; + out_pyld_tready = chdr_pf_tready; + out_pkt_info_tready = 1'b0; + end + default: begin + chdr_pf_tdata = out_pyld_tdata; + chdr_pf_tvalid = 1'b0; + chdr_pf_tlast = 1'b0; + out_pyld_tready = 1'b0; + out_pkt_info_tready = 1'b0; + end + endcase + end + + + //--------------------------------------------------------------------------- + // Flushing Logic + //--------------------------------------------------------------------------- + + axis_packet_flush #( + .WIDTH (CHDR_W), + .FLUSH_PARTIAL_PKTS (0), + .TIMEOUT_W (32), + .PIPELINE ("IN") + ) chdr_flusher_i ( + .clk (axis_chdr_clk), + .reset (axis_chdr_rst), + .enable (flush_en), + .timeout (flush_timeout), + .flushing (flush_active), + .done (flush_done), + .s_axis_tdata (chdr_pf_tdata), + .s_axis_tlast (chdr_pf_tlast), + .s_axis_tvalid (chdr_pf_tvalid), + .s_axis_tready (chdr_pf_tready), + .m_axis_tdata (m_axis_chdr_tdata), + .m_axis_tlast (m_axis_chdr_tlast), + .m_axis_tvalid (m_axis_chdr_tvalid), + .m_axis_tready (m_axis_chdr_tready) + ); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/core/axis_pyld_ctxt_to_chdr.v b/fpga/usrp3/lib/rfnoc/core/axis_pyld_ctxt_to_chdr.v new file mode 100644 index 000000000..c73d7f365 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/axis_pyld_ctxt_to_chdr.v @@ -0,0 +1,463 @@ +// +// Copyright 2018-2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_pyld_ctxt_to_chdr +// Description: +// A header framer module for CHDR data packets. +// Accepts an input payload and context stream, and produces an +// output CHDR stream. +// This module also performs an optional clock crossing and data +// width convertion from a user requested width for the +// payload bus to CHDR_W. +// Context and data packets must be interleaved i.e. a context packet +// must arrive before its corresponding data packet. However, if +// context prefetching is enabled, the context for the next packet +// may arrive before the data for the current packet has been +// consumed. In the case of a rate reduction, this allows the module +// to sustain a gapless stream of payload items and a bursty +// sideband context path. +// +// Parameters: +// - CHDR_W: Width of the input CHDR bus in bits +// - ITEM_W: Width of the output item bus in bits +// - NIPC: The number of output items delievered per cycle +// - SYNC_CLKS: Are the CHDR and data clocks synchronous to each other? +// - CONTEXT_FIFO_SIZE: FIFO size for the context path +// - PAYLOAD_FIFO_SIZE: FIFO size for the payload path +// - MTU: Log2 of the maximum packet size in words +// - CONTEXT_PREFETCH_EN: Is context prefetching enabled? +// +// Signals: +// - s_axis_payload_* : Input payload stream (AXI-Stream) +// - s_axis_context_* : Input context stream (AXI-Stream) +// - s_axis_chdr_* : Output CHDR stream (AXI-Stream) +// - framer_errors : Number of framer errors (dropped packets) +// - flush_* : Signals for flush control and status +// + +module axis_pyld_ctxt_to_chdr #( + parameter CHDR_W = 256, + parameter ITEM_W = 32, + parameter NIPC = 2, + parameter SYNC_CLKS = 0, + parameter CONTEXT_FIFO_SIZE = 1, + parameter PAYLOAD_FIFO_SIZE = 1, + parameter MTU = 9, + parameter CONTEXT_PREFETCH_EN = 1 +)( + // Clock, reset and settings + input wire axis_chdr_clk, + input wire axis_chdr_rst, + input wire axis_data_clk, + input wire axis_data_rst, + // CHDR in (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_chdr_tdata, + output wire m_axis_chdr_tlast, + output wire m_axis_chdr_tvalid, + input wire m_axis_chdr_tready, + // Payload stream out (AXI-Stream) + input wire [(ITEM_W*NIPC)-1:0] s_axis_payload_tdata, + input wire [NIPC-1:0] s_axis_payload_tkeep, + input wire s_axis_payload_tlast, + input wire s_axis_payload_tvalid, + output wire s_axis_payload_tready, + // Context stream out (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_context_tdata, + input wire [3:0] s_axis_context_tuser, + input wire s_axis_context_tlast, + input wire s_axis_context_tvalid, + output wire s_axis_context_tready, + // Status + output reg [31:0] framer_errors, + // Flush signals + input wire flush_en, + input wire [31:0] flush_timeout, + output wire flush_active, + output wire flush_done +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_axis_ctrl_utils.vh" + + // --------------------------------------------------- + // Intput State Machine + // --------------------------------------------------- + reg [2:0] ctxt_pkt_cnt = 3'd0, pyld_pkt_cnt = 3'd0; + // A payload packet can pass only if it is preceeded by a context packet + wire pass_pyld = ((ctxt_pkt_cnt - pyld_pkt_cnt) > 3'd0); + // A context packet has to be blocked if its corresponding payload packet hasn't passed except + // when prefetching is enabled. In that case one additional context packet is allowed to pass + wire pass_ctxt = ((ctxt_pkt_cnt - pyld_pkt_cnt) < (CONTEXT_PREFETCH_EN == 1 ? 3'd2 : 3'd1)); + + always @(posedge axis_data_clk) begin + if (axis_data_rst) begin + ctxt_pkt_cnt <= 3'd0; + pyld_pkt_cnt <= 3'd0; + end else begin + if (s_axis_context_tvalid && s_axis_context_tready && s_axis_context_tlast) + ctxt_pkt_cnt <= ctxt_pkt_cnt + 3'd1; + if (s_axis_payload_tvalid && s_axis_payload_tready && s_axis_payload_tlast) + pyld_pkt_cnt <= pyld_pkt_cnt + 3'd1; + end + end + + wire tmp_ctxt_tvalid, tmp_ctxt_tready; + wire tmp_pyld_tvalid, tmp_pyld_tready; + + assign tmp_ctxt_tvalid = s_axis_context_tvalid && pass_ctxt; + assign tmp_pyld_tvalid = s_axis_payload_tvalid && pass_pyld; + assign s_axis_context_tready = tmp_ctxt_tready && pass_ctxt; + assign s_axis_payload_tready = tmp_pyld_tready && pass_pyld; + + // --------------------------------------------------- + // Data Width Converter: ITEM_W*NIPC => CHDR_W + // --------------------------------------------------- + wire [CHDR_W-1:0] in_pyld_tdata; + wire in_pyld_tlast; + wire in_pyld_tvalid; + wire in_pyld_tready; + + axis_width_conv #( + .WORD_W(ITEM_W), .IN_WORDS(NIPC), .OUT_WORDS(CHDR_W/ITEM_W), + .SYNC_CLKS(1), .PIPELINE("IN") + ) payload_width_conv_i ( + .s_axis_aclk(axis_data_clk), .s_axis_rst(axis_data_rst), + .s_axis_tdata(s_axis_payload_tdata), + .s_axis_tkeep(s_axis_payload_tkeep), + .s_axis_tlast(s_axis_payload_tlast), + .s_axis_tvalid(tmp_pyld_tvalid), + .s_axis_tready(tmp_pyld_tready), + .m_axis_aclk(axis_data_clk), .m_axis_rst(axis_data_rst), + .m_axis_tdata(in_pyld_tdata), + .m_axis_tkeep(/* unused */), + .m_axis_tlast(in_pyld_tlast), + .m_axis_tvalid(in_pyld_tvalid), + .m_axis_tready(in_pyld_tready) + ); + + // --------------------------------------------------- + // Payload and Context FIFOs + // --------------------------------------------------- + wire [CHDR_W-1:0] out_ctxt_tdata , out_pyld_tdata ; + wire [3:0] out_ctxt_tuser; + wire out_ctxt_tlast , out_pyld_tlast ; + wire out_ctxt_tvalid, out_pyld_tvalid; + reg out_ctxt_tready, out_pyld_tready; + + generate if (SYNC_CLKS) begin + axi_fifo #(.WIDTH(CHDR_W+4+1), .SIZE(CONTEXT_FIFO_SIZE)) ctxt_fifo_i ( + .clk(axis_chdr_clk), .reset(axis_chdr_rst), .clear(1'b0), + .i_tdata({s_axis_context_tlast, s_axis_context_tuser, s_axis_context_tdata}), + .i_tvalid(tmp_ctxt_tvalid), .i_tready(tmp_ctxt_tready), + .o_tdata({out_ctxt_tlast, out_ctxt_tuser, out_ctxt_tdata}), + .o_tvalid(out_ctxt_tvalid), .o_tready(out_ctxt_tready), + .space(), .occupied() + ); + axi_fifo #(.WIDTH(CHDR_W+1), .SIZE(PAYLOAD_FIFO_SIZE)) pyld_fifo_i ( + .clk(axis_chdr_clk), .reset(axis_chdr_rst), .clear(1'b0), + .i_tdata({in_pyld_tlast, in_pyld_tdata}), + .i_tvalid(in_pyld_tvalid), .i_tready(in_pyld_tready), + .o_tdata({out_pyld_tlast, out_pyld_tdata}), + .o_tvalid(out_pyld_tvalid), .o_tready(out_pyld_tready), + .space(), .occupied() + ); + end else begin + axi_fifo_2clk #(.WIDTH(CHDR_W+4+1), .SIZE(CONTEXT_FIFO_SIZE)) ctxt_fifo_i ( + .reset(axis_data_rst), + .i_aclk(axis_data_clk), + .i_tdata({s_axis_context_tlast, s_axis_context_tuser, s_axis_context_tdata}), + .i_tvalid(tmp_ctxt_tvalid), .i_tready(tmp_ctxt_tready), + .o_aclk(axis_chdr_clk), + .o_tdata({out_ctxt_tlast, out_ctxt_tuser, out_ctxt_tdata}), + .o_tvalid(out_ctxt_tvalid), .o_tready(out_ctxt_tready) + ); + axi_fifo_2clk #(.WIDTH(CHDR_W+1), .SIZE(PAYLOAD_FIFO_SIZE)) pyld_fifo_i ( + .reset(axis_data_rst), + .i_aclk(axis_data_clk), + .i_tdata({in_pyld_tlast, in_pyld_tdata}), + .i_tvalid(in_pyld_tvalid), .i_tready(in_pyld_tready), + .o_aclk(axis_chdr_clk), + .o_tdata({out_pyld_tlast, out_pyld_tdata}), + .o_tvalid(out_pyld_tvalid), .o_tready(out_pyld_tready) + ); + end endgenerate + + // --------------------------------------------------- + // Output State Machine + // --------------------------------------------------- + wire [CHDR_W-1:0] chdr_pg_tdata; + reg chdr_pg_tlast, chdr_pg_tvalid; + wire chdr_pg_terror, chdr_pg_tready; + + localparam [2:0] ST_HDR = 3'd0; // Processing the output CHDR header + localparam [2:0] ST_TS = 3'd1; // Processing the output CHDR timestamp + localparam [2:0] ST_MDATA = 3'd2; // Processing the output CHDR metadata word + localparam [2:0] ST_BODY = 3'd3; // Processing the output CHDR payload word + localparam [2:0] ST_DROP_CTXT = 3'd4; // Something went wrong... Dropping context packet + localparam [2:0] ST_DROP_PYLD = 3'd5; // Something went wrong... Dropping payload packet + localparam [2:0] ST_TERMINATE = 3'd6; // Something went wrong... Rejecting output packet + + reg [2:0] state = ST_HDR; + reg [4:0] mdata_pending = 5'd0; + + // Shortcuts: CHDR header + wire [2:0] out_pkt_type = chdr_get_pkt_type(out_ctxt_tdata[63:0]); + wire [4:0] out_num_mdata = chdr_get_num_mdata(out_ctxt_tdata[63:0]); + + always @(posedge axis_chdr_clk) begin + if (axis_chdr_rst) begin + state <= ST_HDR; + framer_errors <= 32'd0; + end else begin + case (state) + + // ST_HDR: CHDR Header + // ------------------- + ST_HDR: begin + if (out_ctxt_tvalid && out_ctxt_tready) begin + mdata_pending <= out_num_mdata; + if (CHDR_W > 64) begin + // When CHDR_W > 64, the timestamp is a part of the header word. + // If this is a data packet (with/without a TS), we skip the TS state + // move directly to metadata/body + if (out_num_mdata != CHDR_NO_MDATA) begin + if (!out_ctxt_tlast) + if (out_ctxt_tuser == CONTEXT_FIELD_HDR_TS) + state <= ST_MDATA; // tlast should be low. Move to metadata. + else + state <= ST_DROP_CTXT; // Malformed packet: Wrong tuser. Drop ctxt+pyld + else + state <= ST_DROP_PYLD; // Premature tlast. Drop pyld + end else begin + if (out_ctxt_tlast) + if (out_ctxt_tuser == CONTEXT_FIELD_HDR_TS) + state <= ST_BODY; // tlast should be high. Move to payload. + else + state <= ST_DROP_PYLD; // Malformed packet: Wrong tuser. Drop pyld + else + state <= ST_DROP_CTXT; // Malformed packet: extra context lines. Drop ctxt+pyld + end + end else begin + // When CHDR_W == 64, the timestamp comes after the header. Check if this is a data + // packet with a TS to figure out the next state. If no TS, then check for metadata + // to move to the next state. Drop any non-data packets. + if (out_pkt_type == CHDR_PKT_TYPE_DATA_TS) begin + if (!out_ctxt_tlast) + if (out_ctxt_tuser == CONTEXT_FIELD_HDR) + state <= ST_TS; // tlast should be low. Move to timestamp. + else + state <= ST_DROP_CTXT; // Malformed packet: Wrong tuser. Drop ctxt+pyld + else + state <= ST_DROP_PYLD; // Premature tlast. Drop pyld + end else begin + if (out_num_mdata != CHDR_NO_MDATA) begin + if (!out_ctxt_tlast) + if (out_ctxt_tuser == CONTEXT_FIELD_HDR) + state <= ST_MDATA; // tlast should be low. Move to metadata. + else + state <= ST_DROP_CTXT; // Malformed packet: Wrong tuser. Drop ctxt+pyld + else + state <= ST_DROP_PYLD; // Premature tlast. Drop pyld + end else begin + if (out_ctxt_tlast) + if (out_ctxt_tuser == CONTEXT_FIELD_HDR) + state <= ST_BODY; // tlast should be high. Move to payload. + else + state <= ST_DROP_PYLD; // Malformed packet: Wrong tuser. Drop pyld + else + state <= ST_DROP_CTXT; // Malformed packet: extra context lines. Drop ctxt+pyld + end + end + end + end + end + + // ST_TS: Timestamp (CHDR_W == 64 only) + // ------------------------------------ + ST_TS: begin + if (out_ctxt_tvalid && out_ctxt_tready) begin + if (mdata_pending != CHDR_NO_MDATA) begin + if (!out_ctxt_tlast) + if (out_ctxt_tuser == CONTEXT_FIELD_TS) + state <= ST_MDATA; // tlast should be low. Move to metadata. + else + state <= ST_DROP_CTXT; // Malformed packet: Wrong tuser. Drop ctxt+pyld + else + state <= ST_DROP_PYLD; // Premature tlast. Drop pyld + end else begin + if (out_ctxt_tlast) + if (out_ctxt_tuser == CONTEXT_FIELD_TS) + state <= ST_BODY; // tlast should be high. Move to payload. + else + state <= ST_DROP_PYLD; // Malformed packet: Wrong tuser. Drop pyld + else + state <= ST_DROP_CTXT; // Malformed packet: extra context lines. Drop ctxt+pyld + end + end + end + + // ST_MDATA: Metadata word + // ----------------------- + ST_MDATA: begin + if (out_ctxt_tvalid && out_ctxt_tready) begin + if (mdata_pending != 5'd1) begin + mdata_pending <= mdata_pending - 'd1; + if (!out_ctxt_tlast) + if (out_ctxt_tuser == CONTEXT_FIELD_MDATA) + state <= ST_MDATA; // tlast should be low. Continue processing metadata. + else + state <= ST_DROP_CTXT; // Malformed packet: Wrong tuser. Drop ctxt+pyld + else + state <= ST_DROP_PYLD; // Premature tlast. Drop pyld + end else begin + if (out_ctxt_tlast) + if (out_ctxt_tuser == CONTEXT_FIELD_MDATA) + state <= ST_BODY; // tlast should be high. Move to payload. + else + state <= ST_DROP_PYLD; // Malformed packet: Wrong tuser. Drop pyld + else + state <= ST_DROP_CTXT; // Malformed packet: extra context lines. Drop ctxt+pyld + end + end + end + + // ST_BODY: Payload word + // --------------------- + ST_BODY: begin + if (out_pyld_tvalid && out_pyld_tready) begin + state <= out_pyld_tlast ? ST_HDR : ST_BODY; + end + end + + // ST_DROP_CTXT: Drop current context packet + // ----------------------------------------- + ST_DROP_CTXT: begin + if (out_ctxt_tvalid && out_ctxt_tready) begin + state <= out_ctxt_tlast ? ST_DROP_PYLD : ST_DROP_CTXT; + end + end + + // ST_DROP_PYLD: Drop current payload packet + // ----------------------------------------- + ST_DROP_PYLD: begin + if (out_pyld_tvalid && out_pyld_tready) begin + state <= out_pyld_tlast ? ST_TERMINATE : ST_DROP_PYLD; + end + end + + // ST_TERMINATE: Drop partial output packet + // ---------------------------------------- + ST_TERMINATE: begin + if (chdr_pg_tready) begin + state <= ST_HDR; + framer_errors <= framer_errors + 32'd1; + end + end + + default: begin + // We should never get here + state <= ST_HDR; + end + endcase + end + end + + + always @(*) begin + case (state) + ST_HDR: begin + // A context word passes fwd to the CHDR output + chdr_pg_tvalid = out_ctxt_tvalid; + chdr_pg_tlast = 1'b0; // tlast is inherited from the data stream + out_ctxt_tready = chdr_pg_tready; + out_pyld_tready = 1'b0; + end + ST_TS: begin + // A context word passes fwd to the CHDR output + chdr_pg_tvalid = out_ctxt_tvalid; + chdr_pg_tlast = 1'b0; // tlast is inherited from the data stream + out_ctxt_tready = chdr_pg_tready; + out_pyld_tready = 1'b0; + end + ST_MDATA: begin + // A context word passes fwd to the CHDR output + chdr_pg_tvalid = out_ctxt_tvalid; + chdr_pg_tlast = 1'b0; // tlast is inherited from the data stream + out_ctxt_tready = chdr_pg_tready; + out_pyld_tready = 1'b0; + end + ST_BODY: begin + // A payload word passes fwd to the CHDR output + chdr_pg_tvalid = out_pyld_tvalid; + chdr_pg_tlast = out_pyld_tlast; + out_ctxt_tready = 1'b0; + out_pyld_tready = chdr_pg_tready; + end + ST_DROP_CTXT: begin + // A context word is consumed without passing fwd + chdr_pg_tvalid = 1'b0; + chdr_pg_tlast = 1'b0; + out_ctxt_tready = 1'b1; + out_pyld_tready = 1'b0; + end + ST_DROP_PYLD: begin + // A payload word is consumed without passing fwd + chdr_pg_tvalid = 1'b0; + chdr_pg_tlast = 1'b0; + out_ctxt_tready = 1'b0; + out_pyld_tready = 1'b1; + end + ST_TERMINATE: begin + // A dummy word with a tlast and terror is passed fwd + // to evacuate the current packet from the packet_gate + chdr_pg_tvalid = 1'b1; + chdr_pg_tlast = 1'b1; + out_ctxt_tready = 1'b0; + out_pyld_tready = 1'b0; + end + default: begin + chdr_pg_tvalid = 1'b0; + chdr_pg_tlast = 1'b0; + out_ctxt_tready = 1'b0; + out_pyld_tready = 1'b0; + end + endcase + end + + assign chdr_pg_tdata = (state == ST_BODY) ? out_pyld_tdata : out_ctxt_tdata; + assign chdr_pg_terror = (state == ST_TERMINATE); + + // --------------------------------------------------- + // Packet gate + // --------------------------------------------------- + + wire [CHDR_W-1:0] chdr_flush_tdata; + wire chdr_flush_tlast, chdr_flush_tvalid; + wire chdr_flush_terror, chdr_flush_tready; + + axis_packet_flush #( + .WIDTH(CHDR_W+1), .FLUSH_PARTIAL_PKTS(0), .TIMEOUT_W(32), .PIPELINE("IN") + ) chdr_flusher_i ( + .clk(axis_chdr_clk), .reset(axis_chdr_rst), + .enable(flush_en), .timeout(flush_timeout), + .flushing(flush_active), .done(flush_done), + .s_axis_tdata({chdr_pg_terror, chdr_pg_tdata}), .s_axis_tlast(chdr_pg_tlast), + .s_axis_tvalid(chdr_pg_tvalid), .s_axis_tready(chdr_pg_tready), + .m_axis_tdata({chdr_flush_terror, chdr_flush_tdata}), .m_axis_tlast(chdr_flush_tlast), + .m_axis_tvalid(chdr_flush_tvalid), .m_axis_tready(chdr_flush_tready) + ); + + axi_packet_gate #( .WIDTH(CHDR_W), .SIZE(MTU), .USE_AS_BUFF(0) ) out_gate_i ( + .clk(axis_chdr_clk), .reset(axis_chdr_rst), .clear(flush_active), + .i_tdata(chdr_flush_tdata), .i_tlast(chdr_flush_tlast), .i_terror(chdr_flush_terror), + .i_tvalid(chdr_flush_tvalid), .i_tready(chdr_flush_tready), + .o_tdata(m_axis_chdr_tdata), .o_tlast(m_axis_chdr_tlast), + .o_tvalid(m_axis_chdr_tvalid), .o_tready(m_axis_chdr_tready) + ); + +endmodule // axis_pyld_ctxt_to_chdr diff --git a/fpga/usrp3/lib/rfnoc/core/backend_iface.v b/fpga/usrp3/lib/rfnoc/core/backend_iface.v new file mode 100644 index 000000000..59429ea6b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/backend_iface.v @@ -0,0 +1,142 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: backend_iface +// Description: +// A noc_shell interface to the backend infrastructure +// + +module backend_iface #( + parameter [31:0] NOC_ID = 32'h0, + parameter [5:0] NUM_DATA_I = 0, + parameter [5:0] NUM_DATA_O = 0, + parameter [5:0] CTRL_FIFOSIZE = 0, + parameter [7:0] CTRL_MAX_ASYNC_MSGS = 0, + parameter [5:0] MTU = 0 +)( + // Input clock + input wire rfnoc_chdr_clk, + input wire rfnoc_ctrl_clk, + // Backend interface (sync. to rfnoc_ctrl_clk) + input wire [511:0] rfnoc_core_config, + output wire [511:0] rfnoc_core_status, + // Output reset + output wire rfnoc_chdr_rst, + output wire rfnoc_ctrl_rst, + // Flush interface (sync. to rfnoc_chdr_clk) + output wire data_i_flush_en, + output wire [31:0] data_i_flush_timeout, + input wire [63:0] data_i_flush_active, + input wire [63:0] data_i_flush_done, + output wire data_o_flush_en, + output wire [31:0] data_o_flush_timeout, + input wire [63:0] data_o_flush_active, + input wire [63:0] data_o_flush_done +); + localparam RESET_LENGTH = 32; + + `include "rfnoc_backend_iface.vh" + + // ----------------------------------- + // CONFIG: Infrastructure => Block + // ----------------------------------- + wire [BEC_TOTAL_WIDTH-1:0] rfnoc_core_config_trim = rfnoc_core_config[BEC_TOTAL_WIDTH-1:0]; + + reg [31:0] flush_timeout_ctclk = 32'd0; + reg flush_en_ctclk = 1'b0; + reg soft_ctrl_rst_ctclk = 1'b0; + reg soft_chdr_rst_ctclk = 1'b0; + + // Register logic before synchronizer + always @(posedge rfnoc_ctrl_clk) begin + flush_timeout_ctclk <= rfnoc_core_config_trim[BEC_FLUSH_TIMEOUT_OFFSET +: BEC_FLUSH_TIMEOUT_WIDTH]; + flush_en_ctclk <= rfnoc_core_config_trim[BEC_FLUSH_EN_OFFSET +: BEC_FLUSH_EN_WIDTH ]; + soft_ctrl_rst_ctclk <= rfnoc_core_config_trim[BEC_SOFT_CTRL_RST_OFFSET +: BEC_SOFT_CTRL_RST_WIDTH]; + soft_chdr_rst_ctclk <= rfnoc_core_config_trim[BEC_SOFT_CHDR_RST_OFFSET +: BEC_SOFT_CHDR_RST_WIDTH]; + end + + // Synchronizer + wire [31:0] flush_timeout_chclk; + wire flush_en_chclk; + + // Note: We are using a synchronizer to cross the 32-bit timeout bus + // into a different clock domain. Typically we would use a 2clk FIFO + // but it's OK to have the bits unsynchronized here because the value + // is static and is set from SW long before it is actually used. + + synchronizer #(.WIDTH(33), .INITIAL_VAL(33'd0)) sync_ctrl_i ( + .clk(rfnoc_chdr_clk), .rst(1'b0), + .in({flush_en_ctclk, flush_timeout_ctclk}), + .out({flush_en_chclk, flush_timeout_chclk}) + ); + + // Synchronize the reset to the CHDR and CTRL clock domains, and extend the + // reset pulse to make it long enough for most IP to reset correctly. + + wire rfnoc_ctrl_rst_pulse; + wire rfnoc_chdr_rst_pulse; + + pulse_synchronizer #(.MODE("POSEDGE")) soft_ctrl_rst_sync_i ( + .clk_a(rfnoc_ctrl_clk), .rst_a(1'b0), .pulse_a(soft_ctrl_rst_ctclk), .busy_a(), + .clk_b(rfnoc_ctrl_clk), .pulse_b(rfnoc_ctrl_rst_pulse) + ); + + pulse_synchronizer #(.MODE("POSEDGE")) soft_chdr_rst_sync_i ( + .clk_a(rfnoc_ctrl_clk), .rst_a(1'b0), .pulse_a(soft_chdr_rst_ctclk), .busy_a(), + .clk_b(rfnoc_chdr_clk), .pulse_b(rfnoc_chdr_rst_pulse) + ); + + pulse_stretch_min #(.LENGTH(RESET_LENGTH)) soft_ctrl_rst_stretch_i ( + .clk(rfnoc_ctrl_clk), .rst(1'b0), + .pulse_in(rfnoc_ctrl_rst_pulse), .pulse_out(rfnoc_ctrl_rst) + ); + + pulse_stretch_min #(.LENGTH(RESET_LENGTH)) soft_chdr_rst_stretch_i ( + .clk(rfnoc_chdr_clk), .rst(1'b0), + .pulse_in(rfnoc_chdr_rst_pulse), .pulse_out(rfnoc_chdr_rst) + ); + + assign data_i_flush_timeout = flush_timeout_chclk; + assign data_o_flush_timeout = flush_timeout_chclk; + assign data_i_flush_en = flush_en_chclk; + assign data_o_flush_en = flush_en_chclk; + + // ----------------------------------- + // STATUS: Block => Infrastructure + // ----------------------------------- + + reg flush_active_chclk = 1'b0; + reg flush_done_chclk = 1'b0; + + // Register logic before synchronizer + wire flush_active_ctclk; + wire flush_done_ctclk; + + always @(posedge rfnoc_chdr_clk) begin + flush_active_chclk <= (|data_i_flush_active[NUM_DATA_I-1:0]) | (|data_o_flush_active[NUM_DATA_O-1:0]); + flush_done_chclk <= (&data_i_flush_done [NUM_DATA_I-1:0]) & (&data_o_flush_done [NUM_DATA_O-1:0]); + end + + // Synchronizer + synchronizer #(.WIDTH(2), .INITIAL_VAL(2'd0)) sync_status_i ( + .clk(rfnoc_ctrl_clk), .rst(1'b0), + .in({flush_active_chclk, flush_done_chclk}), + .out({flush_active_ctclk, flush_done_ctclk}) + ); + + assign rfnoc_core_status[BES_PROTO_VER_OFFSET +:BES_PROTO_VER_WIDTH ] = BACKEND_PROTO_VER; + assign rfnoc_core_status[BES_NUM_DATA_I_OFFSET +:BES_NUM_DATA_I_WIDTH ] = NUM_DATA_I; + assign rfnoc_core_status[BES_NUM_DATA_O_OFFSET +:BES_NUM_DATA_O_WIDTH ] = NUM_DATA_O; + assign rfnoc_core_status[BES_CTRL_FIFOSIZE_OFFSET +:BES_CTRL_FIFOSIZE_WIDTH ] = CTRL_FIFOSIZE; + assign rfnoc_core_status[BES_CTRL_MAX_ASYNC_MSGS_OFFSET+:BES_CTRL_MAX_ASYNC_MSGS_WIDTH] = CTRL_MAX_ASYNC_MSGS; + assign rfnoc_core_status[BES_NOC_ID_OFFSET +:BES_NOC_ID_WIDTH ] = NOC_ID; + assign rfnoc_core_status[BES_FLUSH_ACTIVE_OFFSET +:BES_FLUSH_ACTIVE_WIDTH ] = flush_active_ctclk; + assign rfnoc_core_status[BES_FLUSH_DONE_OFFSET +:BES_FLUSH_DONE_WIDTH ] = flush_done_ctclk; + assign rfnoc_core_status[BES_DATA_MTU_OFFSET +:BES_DATA_MTU_WIDTH ] = MTU; + // Assign the rest to 0 + assign rfnoc_core_status[511:BES_TOTAL_WIDTH] = {(512-BES_TOTAL_WIDTH){1'b0}}; + +endmodule // backend_iface + diff --git a/fpga/usrp3/lib/rfnoc/core/chdr_compute_tkeep.v b/fpga/usrp3/lib/rfnoc/core/chdr_compute_tkeep.v new file mode 100644 index 000000000..72c5bab13 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/chdr_compute_tkeep.v @@ -0,0 +1,86 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_compute_tkeep +// Description: +// This module monitors an AXI-Stream CHDR bus and uses the +// packet size field in the CHDR header to compute a tkeep +// trailer signal to indicate the the valid bytes when +// tlast is asserted. +// +// Parameters: +// - CHDR_W: Width of the CHDR bus in bits +// - ITEM_W: Width of the item bus in bits (must be a multiple of 8) +// +// Signals: +// - axis_* : AXI-Stream CHDR bus + +module chdr_compute_tkeep #( + parameter CHDR_W = 256, + parameter ITEM_W = 32 +)( + input wire clk, + input wire rst, + input wire [CHDR_W-1:0] axis_tdata, + input wire axis_tlast, + input wire axis_tvalid, + input wire axis_tready, + output wire [(CHDR_W/ITEM_W)-1:0] axis_tkeep +); + + `include "rfnoc_chdr_utils.vh" + + generate if (CHDR_W > ITEM_W) begin + + localparam CHDR_W_BYTES = CHDR_W/8; + localparam ITEM_W_BYTES = ITEM_W/8; + localparam KEEP_W = CHDR_W_BYTES/ITEM_W_BYTES; + + // Binary to thermometer decoder + // 2'd0 => 4'b1111 (special case) + // 2'd1 => 4'b0001 + // 2'd2 => 4'b0011 + // 2'd3 => 4'b0111 + function [KEEP_W-1:0] bin2thermo; + input [$clog2(KEEP_W)-1:0] bin; + bin2thermo = ~((~1)<<((bin-1)%KEEP_W)); + endfunction + + // Read the packet length and figure out the number + // of trailing items + wire [15:0] pkt_len = chdr_get_length(axis_tdata[63:0]); + wire [KEEP_W-1:0] len_thermo = bin2thermo(pkt_len[$clog2(CHDR_W_BYTES)-1:$clog2(ITEM_W_BYTES)]); + reg [KEEP_W-1:0] reg_len_thermo = 'h0; + reg is_header = 1'b1; + + always @(posedge clk) begin + if (rst) begin + is_header <= 1'b1; + end else if (axis_tvalid & axis_tready) begin + is_header <= axis_tlast; + if (is_header) begin + reg_len_thermo <= len_thermo; + end + end + end + + // tkeep indicates trailing items, so for lines with tlast == 0, + // tkeep is all 1's. + assign axis_tkeep = (~axis_tlast) ? {KEEP_W{1'b1}} : + (is_header ? len_thermo : reg_len_thermo); + + end else if (CHDR_W == ITEM_W) begin + + // Only one item per CHDR word. So always keep it. + assign axis_tkeep = 1'b1; + + end else begin + + // Illegal. A item must be smaller than the CHDR_W + illegal_parameter_value item_w_cannot_be_larger_than_chdr_w(); + + end endgenerate + +endmodule // chdr_compute_tkeep diff --git a/fpga/usrp3/lib/rfnoc/core/chdr_data_swapper.v b/fpga/usrp3/lib/rfnoc/core/chdr_data_swapper.v new file mode 100644 index 000000000..9b5e96bca --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/chdr_data_swapper.v @@ -0,0 +1,227 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_data_swapper +// Description: +// A module to adapt a CHDR stream to correctly sequence in +// a software buffer of a user-specified type. Here are the +// the swapping assumptions: +// - The CHDR header, timestamp and metadata for all packet +// types must be interpreted as a uint64_t. +// - All Control, Stream Status/Cmd, Management packet payloads +// must reside in a uint64_t* buffer. +// - The buffer type for the data packet payload and metadata +// is user configurable +// +// Parameters: +// - CHDR_W: Width of the tdata bus in bits +// +// Signals: +// - payload_sw_buff: SW buffer mode for payload (0=u64, 1=u32, 2=u16, 3=u8) +// - mdata_sw_buff : SW buffer mode for metadata (0=u64, 1=u32, 2=u16, 3=u8) +// - s_axis_* : The input AXI stream +// - m_axis_* : The output AXI stream +// + +module chdr_data_swapper #( + parameter CHDR_W = 256 +)( + // Clock and Reset + input wire clk, + input wire rst, + // Software Buffer Mode + input wire [1:0] payload_sw_buff, + input wire [1:0] mdata_sw_buff, + input wire swap_endianness, + // Input AXIS + input wire [CHDR_W-1:0] s_axis_tdata, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + // Output AXIS + output wire [CHDR_W-1:0] m_axis_tdata, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready +); + + `include "../core/rfnoc_chdr_utils.vh" + + // *_sw_buff values + localparam [1:0] SW_BUFF_UINT64 = 2'd0; + localparam [1:0] SW_BUFF_UINT32 = 2'd1; + localparam [1:0] SW_BUFF_UINT16 = 2'd2; + localparam [1:0] SW_BUFF_UINT8 = 2'd3; + + localparam SWAP_W = $clog2(CHDR_W); + + // Packet states + localparam [2:0] ST_HDR = 3'd0; + localparam [2:0] ST_TS = 3'd1; + localparam [2:0] ST_MDATA = 3'd2; + localparam [2:0] ST_DATA_BODY = 3'd3; + localparam [2:0] ST_OTHER = 3'd4; + + reg [2:0] state = ST_HDR; + reg [4:0] mdata_pending = CHDR_NO_MDATA; + reg [SWAP_W-2:0] pyld_tswap = 'h0, mdata_tswap = 'h0; + + // Shortcuts: CHDR header + wire [2:0] pkt_type = chdr_get_pkt_type(s_axis_tdata[63:0]); + wire [4:0] num_mdata = chdr_get_num_mdata(s_axis_tdata[63:0]); + + // State machine to determine packet state + always @(posedge clk) begin + if (rst) begin + state <= ST_HDR; + end else if (s_axis_tvalid & s_axis_tready) begin + case (state) + ST_HDR: begin + mdata_pending <= num_mdata; + if (!s_axis_tlast) begin + if (CHDR_W > 64) begin + if (pkt_type == CHDR_PKT_TYPE_DATA || pkt_type == CHDR_PKT_TYPE_DATA_TS) begin + if (num_mdata != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_DATA_BODY; + end + end else begin + state <= ST_OTHER; + end + end else begin + if (pkt_type == CHDR_PKT_TYPE_DATA_TS) begin + state <= ST_TS; + end else if (pkt_type == CHDR_PKT_TYPE_DATA) begin + if (num_mdata != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_DATA_BODY; + end + end else begin + state <= ST_OTHER; + end + end + end else begin + state <= ST_HDR; + end + end + ST_TS: begin + if (!s_axis_tlast) begin + if (mdata_pending != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_DATA_BODY; + end + end else begin + state <= ST_HDR; + end + end + ST_MDATA: begin + if (!s_axis_tlast) begin + if (mdata_pending == 5'd1) begin + state <= ST_DATA_BODY; + end else begin + mdata_pending <= mdata_pending - 5'd1; + end + end else begin + state <= ST_HDR; + end + end + ST_DATA_BODY: begin + if (s_axis_tlast) begin + state <= ST_HDR; + end + end + ST_OTHER: begin + if (s_axis_tlast) begin + state <= ST_HDR; + end + end + default: begin + state <= ST_HDR; + end + endcase + end + end + + // Convert SW buff size to swap-lane map + always @(posedge clk) begin + pyld_tswap <= 'h0; + mdata_tswap <= 'h0; + case (payload_sw_buff) + SW_BUFF_UINT8: + pyld_tswap[4:2] <= 3'b111; + SW_BUFF_UINT16: + pyld_tswap[4:2] <= 3'b110; + SW_BUFF_UINT32: + pyld_tswap[4:2] <= 3'b100; + default: + pyld_tswap[4:2] <= 3'b000; + endcase + case (mdata_sw_buff) + SW_BUFF_UINT8: + mdata_tswap[4:2] <= 3'b111; + SW_BUFF_UINT16: + mdata_tswap[4:2] <= 3'b110; + SW_BUFF_UINT32: + mdata_tswap[4:2] <= 3'b100; + default: + mdata_tswap[4:2] <= 3'b000; + endcase + end + + wire [SWAP_W-2:0] s_axis_tswap_dyn = + (state == ST_DATA_BODY) ? pyld_tswap : ( + (state == ST_MDATA) ? mdata_tswap : {(SWAP_W-1){1'b0}} + ); + wire s_axis_tswap_end = swap_endianness && + (state == ST_DATA_BODY || state == ST_MDATA); + + // Swapper that re-aligns items in a buffer for software + wire [CHDR_W-1:0] out_swap_tdata, out_swap_tdata_pre; + wire out_swap_tswap_end, out_swap_tlast, out_swap_tvalid, out_swap_tready; + + axis_data_swap #( + .DATA_W(CHDR_W), .USER_W(1'b1), + .STAGES_EN({{(SWAP_W-6){1'b0}}, 6'b111100}), .DYNAMIC(1) + ) chdr_dyn_swap_i ( + .clk (clk ), + .rst (rst ), + .s_axis_tdata (s_axis_tdata ), + .s_axis_tswap (s_axis_tswap_dyn ), + .s_axis_tuser (s_axis_tswap_end ), + .s_axis_tlast (s_axis_tlast ), + .s_axis_tvalid(s_axis_tvalid ), + .s_axis_tready(s_axis_tready ), + .m_axis_tdata (out_swap_tdata_pre), + .m_axis_tuser (out_swap_tswap_end), + .m_axis_tlast (out_swap_tlast ), + .m_axis_tvalid(out_swap_tvalid ), + .m_axis_tready(out_swap_tready ) + ); + + // Swapper that pre-corrects for transport endianness + genvar i; + generate for (i = 0; i < CHDR_W/8; i=i+1) begin + assign out_swap_tdata[i*8 +: 8] = out_swap_tswap_end ? + out_swap_tdata_pre[((CHDR_W/8)-i-1)*8 +: 8] : out_swap_tdata_pre[i*8 +: 8]; + end endgenerate + + axi_fifo_flop2 #(.WIDTH(CHDR_W+1)) out_reg_i ( + .clk (clk ), + .reset (rst ), + .clear (1'b0 ), + .i_tdata ({out_swap_tlast, out_swap_tdata}), + .i_tvalid(out_swap_tvalid ), + .i_tready(out_swap_tready ), + .o_tdata ({m_axis_tlast, m_axis_tdata} ), + .o_tvalid(m_axis_tvalid ), + .o_tready(m_axis_tready ), + .occupied( ), + .space ( ) + ); + +endmodule // chdr_data_swapper diff --git a/fpga/usrp3/lib/rfnoc/core/chdr_ingress_fifo.v b/fpga/usrp3/lib/rfnoc/core/chdr_ingress_fifo.v new file mode 100644 index 000000000..e2660426f --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/chdr_ingress_fifo.v @@ -0,0 +1,95 @@ +// +// Copyright 2016 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module chdr_ingress_fifo #( + parameter WIDTH = 64, + parameter SIZE = 12, + parameter DEVICE = "7SERIES" +) ( + input clk, + input reset, + input clear, + + input [WIDTH-1:0] i_tdata, + input i_tlast, + input i_tvalid, + output i_tready, + + output [WIDTH-1:0] o_tdata, + output o_tlast, + output o_tvalid, + input o_tready +); + + localparam SIZE_THRESHOLD = ( + (DEVICE == "7SERIES") ? 14 : ( + (DEVICE == "VIRTEX6") ? 14 : ( + (DEVICE == "SPARTAN6") ? 12 : ( + 12 + )))); + + wire [WIDTH-1:0] i_tdata_pre; + wire i_tlast_pre, i_tvalid_pre, i_tready_pre; + + // SRL based FIFO to break timing paths to BRAM resources + axi_fifo_flop2 #(.WIDTH(WIDTH+1)) pre_fifo ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({i_tlast, i_tdata}), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata({i_tlast_pre, i_tdata_pre}), .o_tvalid(i_tvalid_pre), .o_tready(i_tready_pre), + .space(), .occupied() + ); + + generate + if (SIZE <= SIZE_THRESHOLD) begin + wire [WIDTH-1:0] o_tdata_int; + wire o_tlast_int, o_tvalid_int, o_tready_int; + // Instantiate a single axi_fifo if size is not larger than threshold + axi_fifo #(.WIDTH(WIDTH+1), .SIZE(SIZE)) main_fifo ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({i_tlast_pre, i_tdata_pre}), .i_tvalid(i_tvalid_pre), .i_tready(i_tready_pre), + .o_tdata({o_tlast_int, o_tdata_int}), .o_tvalid(o_tvalid_int), .o_tready(o_tready_int), + .space(), .occupied() + ); + axi_fifo_flop2 #(.WIDTH(WIDTH+1)) fifo_flop2 ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({o_tlast_int, o_tdata_int}), .i_tvalid(o_tvalid_int), .i_tready(o_tready_int), + .o_tdata({o_tlast, o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready), + .space(), .occupied() + ); + end else begin + // Instantiate a cascade of axi_fifos if size is larger than threshold + localparam CDEPTH = 2**(SIZE - SIZE_THRESHOLD); //Cascade Depth + wire [WIDTH-1:0] c_tdata[CDEPTH:0], int_tdata[CDEPTH-1:0]; + wire c_tlast[CDEPTH:0], c_tvalid[CDEPTH:0], c_tready[CDEPTH:0]; + wire int_tlast[CDEPTH-1:0], int_tvalid[CDEPTH-1:0], int_tready[CDEPTH-1:0]; + + //Connect input to first cascade state + assign {c_tdata[0], c_tlast[0], c_tvalid[0]} = {i_tdata_pre, i_tlast_pre, i_tvalid_pre}; + assign i_tready_pre = c_tready[0]; + //Connect output to last cascade state + assign {o_tdata, o_tlast, o_tvalid} = {c_tdata[CDEPTH], c_tlast[CDEPTH], c_tvalid[CDEPTH]}; + assign c_tready[CDEPTH] = o_tready; + + genvar i; + for (i=0; i<CDEPTH; i=i+1) begin: fifo_stages + axi_fifo #(.WIDTH(WIDTH+1), .SIZE(SIZE_THRESHOLD)) main_fifo ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({c_tlast[i], c_tdata[i]}), .i_tvalid(c_tvalid[i]), .i_tready(c_tready[i]), + .o_tdata({int_tlast[i], int_tdata[i]}), .o_tvalid(int_tvalid[i]), .o_tready(int_tready[i]), + .space(), .occupied() + ); + axi_fifo_flop2 #(.WIDTH(WIDTH+1)) fifo_flop2 ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({int_tlast[i], int_tdata[i]}), .i_tvalid(int_tvalid[i]), .i_tready(int_tready[i]), + .o_tdata({c_tlast[i+1], c_tdata[i+1]}), .o_tvalid(c_tvalid[i+1]), .o_tready(c_tready[i+1]), + .space(), .occupied() + ); + end + end + endgenerate + +endmodule // axi_fifo_large diff --git a/fpga/usrp3/lib/rfnoc/core/chdr_mgmt_pkt_handler.v b/fpga/usrp3/lib/rfnoc/core/chdr_mgmt_pkt_handler.v new file mode 100644 index 000000000..f9c56a6e1 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/chdr_mgmt_pkt_handler.v @@ -0,0 +1,617 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_mgmt_pkt_handler +// Description: +// This module sits inline on a CHDR stream and adds a management +// node that is discoverable and configurable by software. As a +// management node, a control-port master to configure any slave. +// The output CHDR stream has an additional tdest and tid which can +// be used to make routing decisions for management packets only. +// tid will be CHDR_MGMT_ROUTE_TDEST when tdest should be used. +// +// Parameters: +// - PROTOVER: RFNoC protocol version {8'd<major>, 8'd<minor>} +// - CHDR_W: Width of the CHDR bus in bits +// - USER_W: Width of the user/data bits that accompany an advertisement op +// - RESP_FIFO_SIZE: Log2 of the depth of the response FIFO +// Maximum value = 8 +// +// Signals: +// - s_axis_chdr_* : Input CHDR stream (AXI-Stream) +// - m_axis_chdr_* : Output CHDR stream (AXI-Stream) +// - node_info: Info about the node that contains this management slave +// - ctrlport_* : Control-port master for management peripheral +// - op_*: Strobe and info signals for a mgmt advertisement + +module chdr_mgmt_pkt_handler #( + parameter [15:0] PROTOVER = {8'd1, 8'd0}, + parameter CHDR_W = 256, + parameter USER_W = 1, + parameter [0:0] MGMT_ONLY = 0, + parameter RESP_FIFO_SIZE = 5 +)( + // Clock, reset and settings + input wire clk, + input wire rst, + // Node Info + input wire [47:0] node_info, + // CHDR Data In (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_chdr_tdata, + input wire s_axis_chdr_tlast, + input wire s_axis_chdr_tvalid, + output wire s_axis_chdr_tready, + input wire [USER_W-1:0] s_axis_chdr_tuser, + // CHDR Data Out (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_chdr_tdata, + output wire [1:0] m_axis_chdr_tid, // Routing mode. Values defined in rfnoc_chdr_internal_utils.vh + output wire [9:0] m_axis_chdr_tdest, // Manual routing destination (only valid for tid = CHDR_MGMT_ROUTE_TDEST) + output wire m_axis_chdr_tlast, + output wire m_axis_chdr_tvalid, + input wire m_axis_chdr_tready, + // Control port endpoint + output reg ctrlport_req_wr, + output reg ctrlport_req_rd, + output reg [15:0] ctrlport_req_addr, + output reg [31:0] ctrlport_req_data, + input wire ctrlport_resp_ack, + input wire [31:0] ctrlport_resp_data, + // Mgmt packet advertisement strobe + output wire [USER_W-1:0] op_data, + output wire op_stb, + output wire [15:0] op_dst_epid, + output wire [15:0] op_src_epid +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_chdr_internal_utils.vh" + + // --------------------------------------------------- + // Instantiate input demux and output mux to allow + // non-management packets to be bypassed + // --------------------------------------------------- + + localparam CHDR_W_BYTES = CHDR_W / 8; + localparam LOG2_CHDR_W_BYTES = $clog2(CHDR_W_BYTES); + + wire [CHDR_W-1:0] s_mgmt_tdata, m_mgmt_tdata; + wire [USER_W-1:0] s_mgmt_tuser; + wire [9:0] m_mgmt_tdest; + wire [1:0] m_mgmt_tid; + wire s_mgmt_tlast, s_mgmt_tvalid, s_mgmt_tready; + wire m_mgmt_tlast, m_mgmt_tvalid, m_mgmt_tready; + + generate if (!MGMT_ONLY) begin + // Instantiate MUX and DEMUX to segregate management and non-management packets. + // Management packets go to the main state machine, all others get bypassed to + // the output. + wire [CHDR_W-1:0] bypass_tdata; + wire [9:0] bypass_tdest; + wire [1:0] bypass_tid; + wire bypass_tlast, bypass_tvalid, bypass_tready; + wire [CHDR_W-1:0] s_header; + + // We consume the management packet only if it is actually a management packet and we + // don't know where it's going. If the packet has a valid EPID, it is a response that + // is capable of being routed. + wire consume_mgmt_pkt = (chdr_get_pkt_type(s_header[63:0]) == CHDR_PKT_TYPE_MGMT) && + (chdr_get_dst_epid(s_header[63:0]) == NULL_EPID); + + axi_demux #( + .WIDTH(CHDR_W), .SIZE(2), .PRE_FIFO_SIZE(1), .POST_FIFO_SIZE(0) + ) mgmt_demux_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .header(s_header), .dest(consume_mgmt_pkt ? 1'b1 : 1'b0), + .i_tdata(s_axis_chdr_tdata), .i_tlast(s_axis_chdr_tlast), + .i_tvalid(s_axis_chdr_tvalid), .i_tready(s_axis_chdr_tready), + .o_tdata({s_mgmt_tdata, bypass_tdata}), .o_tlast({s_mgmt_tlast, bypass_tlast}), + .o_tvalid({s_mgmt_tvalid, bypass_tvalid}), .o_tready({s_mgmt_tready, bypass_tready}) + ); + + // Only one cycle of delay, so can skip past the demux with the tuser bits + // Packets are longer than the latency through the axi_demux + assign s_mgmt_tuser = s_axis_chdr_tuser; + + assign {bypass_tid, bypass_tdest} = {CHDR_MGMT_ROUTE_EPID, 10'h0}; + + axi_mux #( + .WIDTH(CHDR_W+10+2), .SIZE(2), .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) mgmt_mux_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({m_mgmt_tid, m_mgmt_tdest, m_mgmt_tdata, bypass_tid, bypass_tdest, bypass_tdata}), + .i_tlast({m_mgmt_tlast, bypass_tlast}), + .i_tvalid({m_mgmt_tvalid, bypass_tvalid}), .i_tready({m_mgmt_tready, bypass_tready}), + .o_tdata({m_axis_chdr_tid, m_axis_chdr_tdest, m_axis_chdr_tdata}), + .o_tlast(m_axis_chdr_tlast), + .o_tvalid(m_axis_chdr_tvalid), .o_tready(m_axis_chdr_tready) + ); + end else begin + // We are assuming that only management packets come into this module so we don't + // instantiate a bypass path to save resources. + assign s_mgmt_tdata = s_axis_chdr_tdata; + assign s_mgmt_tlast = s_axis_chdr_tlast; + assign s_mgmt_tvalid = s_axis_chdr_tvalid; + assign s_mgmt_tuser = s_axis_chdr_tuser; + assign s_axis_chdr_tready = s_mgmt_tready; + + assign m_axis_chdr_tdata = m_mgmt_tdata; + assign m_axis_chdr_tdest = m_mgmt_tdest; + assign m_axis_chdr_tid = m_mgmt_tid; + assign m_axis_chdr_tlast = m_mgmt_tlast; + assign m_axis_chdr_tvalid = m_mgmt_tvalid; + assign m_mgmt_tready = m_axis_chdr_tready; + end endgenerate + + // --------------------------------------------------- + // Convert management packets to 64-bit + // For CHDR_W > 64, only the bottom 64 bits are used + // --------------------------------------------------- + wire [63:0] i64_tdata; + wire [USER_W-1:0] i64_tuser; + wire i64_tlast, i64_tvalid; + reg i64_tready; + reg [63:0] o64_tdata; + reg [9:0] o64_tdest; + reg [1:0] o64_tid; + reg o64_tlast, o64_tvalid; + wire o64_tready; + + axi_fifo #(.WIDTH(USER_W+65), .SIZE(1)) in_flop_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({s_mgmt_tuser, s_mgmt_tlast, s_mgmt_tdata[63:0]}), + .i_tvalid(s_mgmt_tvalid), .i_tready(s_mgmt_tready), + .o_tdata({i64_tuser, i64_tlast, i64_tdata}), + .o_tvalid(i64_tvalid), .o_tready(i64_tready), + .space(), .occupied() + ); + + axi_fifo #(.WIDTH(64+10+2+1), .SIZE(1)) out_flop_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({o64_tlast, o64_tdest, o64_tid, o64_tdata}), + .i_tvalid(o64_tvalid), .i_tready(o64_tready), + .o_tdata({m_mgmt_tlast, m_mgmt_tdest, m_mgmt_tid, m_mgmt_tdata[63:0]}), + .o_tvalid(m_mgmt_tvalid), .o_tready(m_mgmt_tready), + .space(), .occupied() + ); + + generate + if (CHDR_W > 64) + assign m_mgmt_tdata[CHDR_W-1:CHDR_W-64] = 'h0; + endgenerate + + // --------------------------------------------------- + // Parse management packet + // --------------------------------------------------- + localparam [3:0] ST_CHDR_IN_HDR = 4'd0; // Consuming input CHDR header + localparam [3:0] ST_CHDR_IN_MDATA = 4'd1; // Discarding input CHDR metadata + localparam [3:0] ST_MGMT_IN_HDR = 4'd2; // Consuming input management header + localparam [3:0] ST_MGMT_OP_EXEC = 4'd3; // Management operation started + localparam [3:0] ST_MGMT_OP_WAIT = 4'd4; // Waiting for management op to finish + localparam [3:0] ST_MGMT_OP_DONE = 4'd5; // Consuming management op line + localparam [3:0] ST_CHDR_OUT_HDR = 4'd6; // Outputing a CHDR header + localparam [3:0] ST_MGMT_OUT_HDR = 4'd7; // Outputing a managment header + localparam [3:0] ST_PASS_PAYLOAD = 4'd8; // Passing payload for downstream hops + localparam [3:0] ST_MOD_LAST_HOP = 4'd9; // Processing last hop + localparam [3:0] ST_POP_RESPONSE = 4'd10; // Popping response from response FIFO + localparam [3:0] ST_APPEND_LAST_HOP = 4'd11; // Appending response to last hop + localparam [3:0] ST_FAILSAFE_DROP = 4'd12; // Something went wrong. Flushing input. + + // Pieces of state maintained by this state machine + reg [3:0] pkt_state = ST_CHDR_IN_HDR; // The state variable + reg [4:0] num_mdata; // Number of metadata lines in packet + reg [63:0] cached_chdr_hdr, cached_mgmt_hdr; // Cached copies of the CHDR and mgmt headers + reg [15:0] stripped_len; // The new CHDR length after ops are stripped + reg [9:0] hops_remaining; // Number of hops remaining until pkt is consumed + reg [7:0] resp_op_code; // Opcode for the response + reg [47:0] resp_op_payload; // Payload for the response + reg [USER_W-1:0] cached_tuser; // Cached copy of the tuser bits (for the advertise op) + + // Shortcuts + wire [7:0] op_code = chdr_mgmt_get_op_code(i64_tdata); + wire [47:0] op_payload = chdr_mgmt_get_op_payload(i64_tdata); + + // Inputs and outputs for the response FIFO + wire [55:0] resp_i_tdata, resp_o_tdata; + wire resp_i_tvalid, resp_o_tvalid; + wire [7:0] num_resp_pending; + + // The massive state machine + // ------------------------- + always @(posedge clk) begin + if (rst) begin + // We just need to initialize pkt_state here. + // All other registers are initialized in states before their usage + pkt_state <= ST_CHDR_IN_HDR; + end else begin + case (pkt_state) + + // ST_CHDR_IN_HDR + // ------------------ + // - Cache and consume the CHDR header. It will be modified + // later before the packet is sent out. + // - Initialize CHDR specific state + ST_CHDR_IN_HDR: begin + if (i64_tvalid && i64_tready) begin + cached_chdr_hdr <= i64_tdata; + cached_tuser <= i64_tuser; + stripped_len <= chdr_get_length(i64_tdata); + num_mdata <= chdr_get_num_mdata(i64_tdata) - 5'd1; + if (!i64_tlast) begin + if (chdr_get_pkt_type(i64_tdata) != CHDR_PKT_TYPE_MGMT) + pkt_state <= ST_FAILSAFE_DROP; // Drop non-mgmt packets + else if (chdr_get_num_mdata(i64_tdata) != CHDR_NO_MDATA) + pkt_state <= ST_CHDR_IN_MDATA; // Skip over metadata + else + pkt_state <= ST_MGMT_IN_HDR; // Start processing packet + end else begin + pkt_state <= ST_CHDR_IN_HDR; // Premature termination + end + end + end + + // ST_CHDR_IN_MDATA + // ------------------ + // - Discard incoming CHDR metadata + ST_CHDR_IN_MDATA: begin + if (i64_tvalid && i64_tready) begin + num_mdata <= num_mdata - 5'd1; + if (!i64_tlast) + pkt_state <= (num_mdata == CHDR_NO_MDATA) ? ST_MGMT_IN_HDR : ST_CHDR_IN_MDATA; + else + pkt_state <= ST_CHDR_IN_HDR; // Premature termination + end + end + + // ST_MGMT_IN_HDR + // ------------------ + // - Cache and consume the managment header. It will be modified + // later before the packet is sent out. + // - Initialize management specific state + ST_MGMT_IN_HDR: begin + if (i64_tvalid && i64_tready) begin + cached_mgmt_hdr <= i64_tdata; + hops_remaining <= chdr_mgmt_get_num_hops(i64_tdata); + pkt_state <= (!i64_tlast) ? ST_MGMT_OP_EXEC : ST_CHDR_IN_HDR; + end + end + + // ST_MGMT_OP_EXEC + // ------------------ + // - We are processing a management operation for this hop + // - Launch the requested action be looking at the op_code + ST_MGMT_OP_EXEC: begin + if (i64_tvalid) begin + // Assume that the packet is getting routed normally + // unless some operation changes that + o64_tid <= CHDR_MGMT_ROUTE_EPID; + o64_tdest <= 10'd0; + case (op_code) + // Operation: Do nothing + CHDR_MGMT_OP_NOP: begin + // No-op. Jump to the finish state + pkt_state <= ST_MGMT_OP_DONE; + end + // Operation: Advertise this management packet to outside logic + CHDR_MGMT_OP_ADVERTISE: begin + // Pretty much a no-op. Jump to the finish state + pkt_state <= ST_MGMT_OP_DONE; + end + // Operation: Select a destination (tdest and tid) for the output CHDR stream + CHDR_MGMT_OP_SEL_DEST: begin + o64_tid <= CHDR_MGMT_ROUTE_TDEST; + o64_tdest <= chdr_mgmt_sel_dest_get_tdest(op_payload); + pkt_state <= ST_MGMT_OP_DONE; // Single cycle op + end + // Operation: Return the packet to source (turn it around) + CHDR_MGMT_OP_RETURN: begin + o64_tid <= CHDR_MGMT_RETURN_TO_SRC; + pkt_state <= ST_MGMT_OP_DONE; // Single cycle op + end + // Operation: Handle a node information request. + // Send the info as a response + CHDR_MGMT_OP_INFO_REQ: begin + pkt_state <= ST_MGMT_OP_DONE; // Single cycle op + end + // Operation: Handle a node information response. + // Treat as a no-op because this is a slave + CHDR_MGMT_OP_INFO_RESP: begin + pkt_state <= ST_MGMT_OP_DONE; + end + // Operation: Post a write on the outgoing control-port + CHDR_MGMT_OP_CFG_WR_REQ: begin + // ctrlport_req_* signals are assigned below + pkt_state <= ST_MGMT_OP_WAIT; // Wait until ACKed + end + // Operation: Post a read on the outgoing control-port + CHDR_MGMT_OP_CFG_RD_REQ: begin + // ctrlport_req_* signals are assigned below + pkt_state <= ST_MGMT_OP_WAIT; // Wait until ACKed + end + // Operation: Handle a read response. + // Treat as a no-op because this is a slave + CHDR_MGMT_OP_CFG_RD_RESP: begin + pkt_state <= ST_MGMT_OP_DONE; + end + default: begin + // We should never get here + pkt_state <= ST_CHDR_IN_HDR; + end + endcase + end + end + + // ST_MGMT_OP_WAIT + // ------------------ + // - A management operation has started. We are waiting for it to finish + ST_MGMT_OP_WAIT: begin + if (i64_tvalid) begin + if (op_code == CHDR_MGMT_OP_CFG_WR_REQ || + op_code == CHDR_MGMT_OP_CFG_RD_REQ) begin + // Wait for an control-port transaction to finish + if (ctrlport_resp_ack) begin + pkt_state <= ST_MGMT_OP_DONE; + end + end else begin + // All other operations should not get here + pkt_state <= ST_MGMT_OP_DONE; + end + end + end + + // ST_MGMT_OP_DONE + // ------------------ + // - The management operation has finished + // - Consume a word on the input CHDR stream and update interal state + ST_MGMT_OP_DONE: begin + if (i64_tvalid && i64_tready) begin + if (!i64_tlast) begin + // We just consumed 8-bytes from the incoming packet + stripped_len <= stripped_len - CHDR_W_BYTES; + // Check if this was the last op for this hop. If so start + // to output a packet, otherwise start the next op. + if (chdr_mgmt_get_ops_pending(i64_tdata) == 8'd0) begin + hops_remaining <= hops_remaining - 10'd1; + pkt_state <= ST_CHDR_OUT_HDR; + end else begin + pkt_state <= ST_MGMT_OP_EXEC; + end + end else begin + // Premature termination or this is the last operation + // Either way, move back to the beginning of the next pkt + pkt_state <= ST_CHDR_IN_HDR; + end + end + end + + // ST_CHDR_OUT_HDR + // ------------------ + // - We are outputing the CHDR header + ST_CHDR_OUT_HDR: begin + if (o64_tvalid && o64_tready) + pkt_state <= ST_MGMT_OUT_HDR; + end + + // ST_CHDR_OUT_HDR + // ------------------ + // - We are outputing the management header + ST_MGMT_OUT_HDR: begin + if (o64_tvalid && o64_tready) + if (resp_o_tvalid && (hops_remaining == 10'd1)) + pkt_state <= ST_MOD_LAST_HOP; // Special state to append responses to last hod + else + pkt_state <= ST_PASS_PAYLOAD; // Just pass the data as-is + end + + // ST_PASS_PAYLOAD + // ------------------ + // - We are passing the payload for the downstream hops as-is + ST_PASS_PAYLOAD: begin + if (o64_tvalid && o64_tready) begin + if (!i64_tlast) begin + // Check if this was the last op for this hop. If so update + // the hop count. If this is the last hop then enter the next + // state to process it. We might need to append responses for our + // management operations. + if (chdr_mgmt_get_ops_pending(i64_tdata) == 8'd0) begin + hops_remaining <= hops_remaining - 10'd1; + if (resp_o_tvalid && (hops_remaining == 10'd1)) + pkt_state <= ST_MOD_LAST_HOP; // Special state to append responses to last hod + else + pkt_state <= ST_PASS_PAYLOAD; // Just pass the data as-is + end else begin + pkt_state <= ST_PASS_PAYLOAD; + end + end else begin + pkt_state <= ST_CHDR_IN_HDR; + end + end + end + + // ST_MOD_LAST_HOP + // ------------------ + // - We are processing the last hop. We need a special state because we + // need to update the "ops_pending" field if we have responses to tack + // on to the end of the hop. + // - We continue to pass the input to the output while modifying ops_pending + // - For the last op, we move to the APPEND state if we need to add responses + ST_MOD_LAST_HOP: begin + if (o64_tvalid && o64_tready) begin + // Check if this was the last op for this hop. + if (chdr_mgmt_get_ops_pending(i64_tdata) == 8'd0) begin + if (resp_o_tvalid) + pkt_state <= ST_POP_RESPONSE; // We have pending responses + else + pkt_state <= i64_tlast ? ST_CHDR_IN_HDR : ST_FAILSAFE_DROP; + end + end + end + + // ST_POP_RESPONSE + // ------------------ + // - Pop a response word from the FIFO + ST_POP_RESPONSE: begin + if (resp_o_tvalid) begin + resp_op_code <= resp_o_tdata[7:0]; + resp_op_payload <= resp_o_tdata[55:8]; + pkt_state <= ST_APPEND_LAST_HOP; + end + end + + // ST_APPEND_LAST_HOP + // ------------------ + // - Append the popped response to the output packet here + // - Keep doing so until the response FIFO is empty + ST_APPEND_LAST_HOP: begin + if (o64_tvalid && o64_tready) + pkt_state <= resp_o_tvalid ? ST_POP_RESPONSE : ST_CHDR_IN_HDR; + end + + // ST_FAILSAFE_DROP + // ------------------ + // - Something went wrong. Discard the packet and re-arm the state machine + ST_FAILSAFE_DROP: begin + if (i64_tvalid && i64_tready) + pkt_state <= i64_tlast ? ST_CHDR_IN_HDR : ST_FAILSAFE_DROP; + end + + default: begin + // We should never get here + pkt_state <= ST_CHDR_IN_HDR; + end + endcase + end + end + + // Logic to determine when to consume a word from the input CHDR stream + always @(*) begin + case (pkt_state) + ST_CHDR_IN_HDR: + i64_tready = 1'b1; // Unconditionally consume header + ST_CHDR_IN_MDATA: + i64_tready = 1'b1; // Unconditionally discard header + ST_MGMT_IN_HDR: + i64_tready = 1'b1; // Unconditionally consume header + ST_MGMT_OP_DONE: + i64_tready = 1'b1; // Operation is done. Consume op-word + ST_PASS_PAYLOAD: + i64_tready = o64_tready; // We are passing input -> output + ST_MOD_LAST_HOP: + i64_tready = o64_tready; // We are passing input -> output + ST_FAILSAFE_DROP: + i64_tready = 1'b1; // Unconditionally consume to drop + default: + i64_tready = 1'b0; // Hold the input. We are processing it + endcase + end + + // Swap src/dst EPIDs if returning packet to source + wire [15:0] o64_dst_epid = (o64_tid == CHDR_MGMT_RETURN_TO_SRC) ? + chdr_mgmt_get_src_epid(cached_mgmt_hdr) : chdr_get_dst_epid(cached_chdr_hdr); + wire [15:0] o64_src_epid = (o64_tid == CHDR_MGMT_RETURN_TO_SRC) ? + chdr_get_dst_epid(cached_chdr_hdr) : chdr_mgmt_get_src_epid(cached_mgmt_hdr); + + // Logic to drive the output CHDR stream + always @(*) begin + case (pkt_state) + ST_CHDR_OUT_HDR: begin + // We are generating new data using cached values. + // Output header = Input header with new length + o64_tdata = chdr_set_length( + chdr_set_dst_epid(cached_chdr_hdr, o64_dst_epid), + (stripped_len + (num_resp_pending << LOG2_CHDR_W_BYTES))); + o64_tvalid = 1'b1; + o64_tlast = 1'b0; + end + ST_MGMT_OUT_HDR: begin + // We are generating new data using cached values. + // Output header = Input header with new num_hops and some protocol info + o64_tdata = chdr_mgmt_build_hdr(PROTOVER, chdr_w_to_enum(CHDR_W), + chdr_mgmt_get_num_hops(cached_mgmt_hdr) - 10'd1, o64_src_epid); + o64_tvalid = 1'b1; + o64_tlast = 1'b0; + end + ST_PASS_PAYLOAD: begin + // Input -> Output without modification + o64_tdata = i64_tdata; + o64_tvalid = i64_tvalid; + o64_tlast = i64_tlast; + end + ST_MOD_LAST_HOP: begin + // Input -> Output but update the ops_pending field + o64_tdata = chdr_mgmt_build_op(chdr_mgmt_get_op_payload(i64_tdata), + chdr_mgmt_get_op_code(i64_tdata), + chdr_mgmt_get_ops_pending(i64_tdata) + num_resp_pending); + o64_tvalid = i64_tvalid; + o64_tlast = i64_tlast && !resp_o_tvalid; + end + ST_APPEND_LAST_HOP: begin + // We are generating new data using cached values. + o64_tdata = chdr_mgmt_build_op(resp_op_payload, resp_op_code, num_resp_pending); + o64_tvalid = 1'b1; + o64_tlast = !resp_o_tvalid; + end + default: begin + // We are processing something. Don't output + o64_tdata = 64'h0; + o64_tvalid = 1'b0; + o64_tlast = 1'b0; + end + endcase + end + + // CHDR_MGMT_OP_ADVERTISE + // ---------------------- + assign op_stb = i64_tvalid && (pkt_state == ST_MGMT_OP_DONE) && + (op_code == CHDR_MGMT_OP_ADVERTISE); + assign op_dst_epid = chdr_get_dst_epid(cached_chdr_hdr); + assign op_src_epid = chdr_mgmt_get_src_epid(cached_mgmt_hdr); + assign op_data = cached_tuser; + + // CHDR_MGMT_OP_CFG_WR_REQ + // CHDR_MGMT_OP_CFG_RD_REQ + // ----------------------- + // The request is sent out in the ST_MGMT_OP_EXEC state and we wait for a response + // in the ST_MGMT_OP_WAIT state + always @(posedge clk) begin + if (rst) begin + ctrlport_req_wr <= 1'b0; + ctrlport_req_rd <= 1'b0; + end else begin + ctrlport_req_wr <= i64_tvalid && (pkt_state == ST_MGMT_OP_EXEC) && + (op_code == CHDR_MGMT_OP_CFG_WR_REQ); + ctrlport_req_rd <= i64_tvalid && (pkt_state == ST_MGMT_OP_EXEC) && + (op_code == CHDR_MGMT_OP_CFG_RD_REQ); + ctrlport_req_addr <= chdr_mgmt_cfg_reg_get_addr(op_payload); + ctrlport_req_data <= chdr_mgmt_cfg_reg_get_data(op_payload); + end + end + + // CHDR_MGMT_OP_CFG_RD_REQ + // CHDR_MGMT_OP_INFO_REQ + // ----------------------- + // Collect the response for these operations and push to the response FIFO + assign resp_i_tvalid = i64_tvalid && ( + ((pkt_state == ST_MGMT_OP_WAIT) && (op_code == CHDR_MGMT_OP_CFG_RD_REQ) && ctrlport_resp_ack) || + ((pkt_state == ST_MGMT_OP_DONE) && (op_code == CHDR_MGMT_OP_INFO_REQ))); + assign resp_i_tdata = (op_code == CHDR_MGMT_OP_CFG_RD_REQ) ? + {ctrlport_resp_data, ctrlport_req_addr, CHDR_MGMT_OP_CFG_RD_RESP} : // Ctrlport response + {node_info, CHDR_MGMT_OP_INFO_RESP}; // NodeInfo + + // The response FIFO should be deep enough to store all the responses + wire [15:0] resp_fifo_occ; + axi_fifo #(.WIDTH(56), .SIZE(RESP_FIFO_SIZE)) resp_fifo_i ( + .clk(clk), .reset(rst), .clear(pkt_state == ST_CHDR_IN_HDR), + .i_tdata(resp_i_tdata), .i_tvalid(resp_i_tvalid), + .i_tready(/* Must be high. Responses will be dropped if FIFO is full */), + .o_tdata(resp_o_tdata), .o_tvalid(resp_o_tvalid), + .o_tready(resp_o_tvalid && (pkt_state == ST_POP_RESPONSE)), + .space(), .occupied(resp_fifo_occ) + ); + assign num_resp_pending = resp_fifo_occ[7:0]; + +endmodule // chdr_mgmt_pkt_handler diff --git a/fpga/usrp3/lib/rfnoc/core/chdr_stream_endpoint.v b/fpga/usrp3/lib/rfnoc/core/chdr_stream_endpoint.v new file mode 100644 index 000000000..9c824a0af --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/chdr_stream_endpoint.v @@ -0,0 +1,621 @@ +// +// Copyright 2018-2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_stream_endpoint +// Description: +// The implementation of a stream endpoint. This module serves as +// an endpoint for a bidirectional stream. It implement a control +// and a data path, both of which can be individually enabled using +// parameters. The control path contains a bidirectional CHDR to +// AXIS-Control converter. The data path has a stream input and +// output port. +// +// Parameters: +// - PROTOVER: RFNoC protocol version {8'd<major>, 8'd<minor>} +// - CHDR_W: Width of the CHDR bus in bits +// - INST_NUM: The instance number of this module +// - CTRL_XBAR_PORT: The port index on the control crossbar that +// this module's control path will connect to +// - AXIS_CTRL_EN: Enable control traffic (axis_ctrl port) +// - AXIS_DATA_EN: Enable data traffic (axis_data port) +// - NUM_DATA_I: Number of AXIS data slave ports +// - NUM_DATA_O: Number of AXIS data master ports +// - INGRESS_BUFF_SIZE: Buffer size in log2 of the number of words +// in the ingress buffer for the stream +// - MTU: Log2 of the maximum packet size in words +// - REPORT_STRM_ERRS: Report data stream errors upstream +// - SIM_SPEEDUP: Set to 1 in simultion, and 0 otherwise +// +// Signals: +// - device_id : The ID of the device that has instantiated this module +// - *_axis_chdr_* : Input/output CHDR stream (AXI-Stream) +// - *_axis_ctrl_* : Input/output AXIS-Control streams (AXI-Stream) +// - *_axis_data_* : Input/output CHDR Data streams (AXI-Stream) +// - strm_*_err_stb: The stream encountered an error +// - signal_*_err : Notify upstream that we encountered an error + +module chdr_stream_endpoint #( + parameter [15:0] PROTOVER = {8'd1, 8'd0}, + parameter CHDR_W = 64, + parameter [9:0] INST_NUM = 0, + parameter [9:0] CTRL_XBAR_PORT = 0, + parameter [0:0] AXIS_CTRL_EN = 1, + parameter [0:0] AXIS_DATA_EN = 1, + parameter [5:0] NUM_DATA_I = 1, + parameter [5:0] NUM_DATA_O = 1, + parameter [5:0] INGRESS_BUFF_SIZE = 12, + parameter [5:0] MTU = 10, + parameter [0:0] REPORT_STRM_ERRS = 1, + parameter [0:0] SIM_SPEEDUP = 0 +)( + // Clock, reset and settings + input wire rfnoc_chdr_clk, + input wire rfnoc_chdr_rst, + input wire rfnoc_ctrl_clk, + input wire rfnoc_ctrl_rst, + // Device info + input wire [15:0] device_id, + // CHDR in (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_chdr_tdata, + input wire s_axis_chdr_tlast, + input wire s_axis_chdr_tvalid, + output wire s_axis_chdr_tready, + // CHDR out (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_chdr_tdata, + output wire m_axis_chdr_tlast, + output wire m_axis_chdr_tvalid, + input wire m_axis_chdr_tready, + // Flow controlled data in (AXI-Stream) + input wire [(CHDR_W*NUM_DATA_I)-1:0] s_axis_data_tdata, + input wire [NUM_DATA_I-1:0] s_axis_data_tlast, + input wire [NUM_DATA_I-1:0] s_axis_data_tvalid, + output wire [NUM_DATA_I-1:0] s_axis_data_tready, + // Flow controlled data out (AXI-Stream) + output wire [(CHDR_W*NUM_DATA_O)-1:0] m_axis_data_tdata, + output wire [NUM_DATA_O-1:0] m_axis_data_tlast, + output wire [NUM_DATA_O-1:0] m_axis_data_tvalid, + input wire [NUM_DATA_O-1:0] m_axis_data_tready, + // Control in (AXI-Stream) + input wire [31:0] s_axis_ctrl_tdata, + input wire s_axis_ctrl_tlast, + input wire s_axis_ctrl_tvalid, + output wire s_axis_ctrl_tready, + // Control out (AXI-Stream) + output wire [31:0] m_axis_ctrl_tdata, + output wire m_axis_ctrl_tlast, + output wire m_axis_ctrl_tvalid, + input wire m_axis_ctrl_tready, + // Stream status specfic + output wire strm_seq_err_stb, + output wire strm_data_err_stb, + output wire strm_route_err_stb, + input wire signal_data_err +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_chdr_internal_utils.vh" + + // --------------------------------------------------- + // Filter packets by type + // --------------------------------------------------- + wire [CHDR_W-1:0] ctrl_i_tdata, ctrl_o_tdata; + wire ctrl_i_tlast, ctrl_o_tlast; + wire ctrl_i_tvalid, ctrl_o_tvalid; + wire ctrl_i_tready, ctrl_o_tready; + + wire [CHDR_W-1:0] data_i_tdata, data_o_tdata; + wire data_i_tlast, data_o_tlast; + wire data_i_tvalid, data_o_tvalid; + wire data_i_tready, data_o_tready; + + wire [CHDR_W-1:0] strs_i_tdata, strs_o_tdata; + wire strs_i_tlast, strs_o_tlast; + wire strs_i_tvalid, strs_o_tvalid; + wire strs_i_tready, strs_o_tready; + + wire [CHDR_W-1:0] mgmt_i_tdata, mgmt_o_tdata; + wire mgmt_i_tlast, mgmt_o_tlast; + wire mgmt_i_tvalid, mgmt_o_tvalid; + wire mgmt_i_tready, mgmt_o_tready; + + function [1:0] compute_demux_dest; + input [63:0] hdr; + if (chdr_get_pkt_type(hdr) == CHDR_PKT_TYPE_CTRL) + // Control + compute_demux_dest = 2'd2; + else if (chdr_get_pkt_type(hdr) == CHDR_PKT_TYPE_STRC || + chdr_get_pkt_type(hdr) == CHDR_PKT_TYPE_DATA || + chdr_get_pkt_type(hdr) == CHDR_PKT_TYPE_DATA_TS) + // Data and stream command + compute_demux_dest = 2'd1; + else if (chdr_get_pkt_type(hdr) == CHDR_PKT_TYPE_STRS) + // Stream status + compute_demux_dest = 2'd0; + else + // Management (all packets must return to sender) + compute_demux_dest = 2'd3; + endfunction + + // We give the demux a FIFO large enough to buffer short packets + // Flow control will ensure that data does not back up through + // this demux but we might have the other packet types block + // each other. + localparam DEMUX_FIFO_SIZE = 5; + + wire [CHDR_W-1:0] chdr_header; + axi_demux #( + .WIDTH(CHDR_W), .SIZE(4), .PRE_FIFO_SIZE(DEMUX_FIFO_SIZE), .POST_FIFO_SIZE(1) + ) mgmt_demux_i ( + .clk(rfnoc_chdr_clk), .reset(rfnoc_chdr_rst), .clear(1'b0), + .header(chdr_header), .dest(compute_demux_dest(chdr_header[63:0])), + .i_tdata (s_axis_chdr_tdata ), + .i_tlast (s_axis_chdr_tlast ), + .i_tvalid(s_axis_chdr_tvalid), + .i_tready(s_axis_chdr_tready), + .o_tdata ({mgmt_i_tdata, ctrl_i_tdata, data_i_tdata, strs_i_tdata }), + .o_tlast ({mgmt_i_tlast, ctrl_i_tlast, data_i_tlast, strs_i_tlast }), + .o_tvalid({mgmt_i_tvalid, ctrl_i_tvalid, data_i_tvalid, strs_i_tvalid}), + .o_tready({mgmt_i_tready, ctrl_i_tready, data_i_tready, strs_i_tready}) + ); + + axi_mux #( + .WIDTH(CHDR_W), .SIZE(4), .PRIO(1), .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) mgmt_mux_i ( + .clk(rfnoc_chdr_clk), .reset(rfnoc_chdr_rst), .clear(1'b0), + .i_tdata ({mgmt_o_tdata, data_o_tdata, strs_o_tdata, ctrl_o_tdata }), + .i_tlast ({mgmt_o_tlast, data_o_tlast, strs_o_tlast, ctrl_o_tlast }), + .i_tvalid({mgmt_o_tvalid, data_o_tvalid, strs_o_tvalid, ctrl_o_tvalid}), + .i_tready({mgmt_o_tready, data_o_tready, strs_o_tready, ctrl_o_tready}), + .o_tdata (m_axis_chdr_tdata ), + .o_tlast (m_axis_chdr_tlast ), + .o_tvalid(m_axis_chdr_tvalid), + .o_tready(m_axis_chdr_tready) + ); + + // --------------------------------------------------- + // Management Path + // --------------------------------------------------- + wire ctrlport_req_wr, ctrlport_req_rd; + reg ctrlport_resp_ack = 1'b0; + wire [15:0] ctrlport_req_addr; + wire [31:0] ctrlport_req_data; + reg [31:0] ctrlport_resp_data; + + localparam [17:0] EXTENDED_INFO = { + 3'b0, REPORT_STRM_ERRS, NUM_DATA_O, NUM_DATA_I, AXIS_DATA_EN, AXIS_CTRL_EN}; + + // Handle management packets here + chdr_mgmt_pkt_handler #( + .PROTOVER(PROTOVER), .CHDR_W(CHDR_W), .MGMT_ONLY(1) + ) mgmt_ep_i ( + .clk(rfnoc_chdr_clk), .rst(rfnoc_chdr_rst), + .node_info(chdr_mgmt_build_node_info(EXTENDED_INFO, INST_NUM, NODE_TYPE_STREAM_EP, device_id)), + .s_axis_chdr_tdata(mgmt_i_tdata), .s_axis_chdr_tlast(mgmt_i_tlast), + .s_axis_chdr_tvalid(mgmt_i_tvalid), .s_axis_chdr_tready(mgmt_i_tready), + .s_axis_chdr_tuser('d0), + .m_axis_chdr_tdata(mgmt_o_tdata), .m_axis_chdr_tlast(mgmt_o_tlast), + .m_axis_chdr_tdest(/* unused */), .m_axis_chdr_tid(/* unused */), + .m_axis_chdr_tvalid(mgmt_o_tvalid), .m_axis_chdr_tready(mgmt_o_tready), + .ctrlport_req_wr(ctrlport_req_wr), .ctrlport_req_rd(ctrlport_req_rd), + .ctrlport_req_addr(ctrlport_req_addr), .ctrlport_req_data(ctrlport_req_data), + .ctrlport_resp_ack(ctrlport_resp_ack), .ctrlport_resp_data(ctrlport_resp_data), + .op_stb(/* unused */), .op_dst_epid(/* unused */), .op_src_epid(/* unused */), + .op_data(/* unused */) + ); + + // ============================== REGISTERS ============================== + // * REG_EPID_SELF (Read-Write): + // The endpoint ID of this stream endpoint + // - [15:0]: Endpoint ID + // * REG_RESET_AND_FLUSH (Write-Only): + // Reset and flush register + // - [0]: Flush data path + // - [1]: Flush control path + // * REG_OSTRM_CTRL_STATUS (Read-Write): + // Control and status register for the output stream + // - [0] : Configuration start (strobe) + // - [1] : Is this transport lossy? + // - [3:2]: Payload SW buff (0=u64, 1=u32, 2=u16, 3=u8) + // - [5:4]: Metadata SW buff (0=u64, 1=u32, 2=u16, 3=u8) + // - [6] : Swap endianness + // * REG_OSTRM_DST_EPID (Write-Only): + // The endpoint ID of a downstream stream endpoint + // - [15:0]: Endpoint ID + // * REG_OSTRM_FC_FREQ_BYTES_LO, REG_OSTRM_FC_FREQ_BYTES_HI (Write-Only): + // Number of bytes between flow control status messages + // * REG_OSTRM_FC_FREQ_PKTS (Write-Only): + // Number of packets between flow control status messages + // * REG_OSTRM_FC_HEADROOM (Write-Only): + // Flow control headroom register + // - [15:0]: Bytes of headroom + // - [23:16]: Packets of headroom + // * REG_OSTRM_BUFF_CAP_BYTES_LO, REG_OSTRM_BUFF_CAP_BYTES_HI (Read-Only): + // Number of bytes in the downstream buffer + // * REG_OSTRM_BUFF_CAP_PKTS (Read-Only): + // Number of packets in the downstream buffer + // * REG_OSTRM_SEQ_ERR_CNT (Read-Only): + // Number of sequence errors since initialization + // * REG_OSTRM_DATA_ERR_CNT (Read-Only): + // Number of data integrity errors since initialization + // * REG_OSTRM_ROUTE_ERR_CNT (Read-Only): + // Number of routing errors since initialization + // * REG_ISTRM_CTRL_STATUS (Read-Write): + // Control and status register for the input stream + // - [0] : Reserved + // - [1] : Reserved + // - [3:2]: Payload SW buff (0=u64, 1=u32, 2=u16, 3=u8) + // - [5:4]: Metadata SW buff (0=u64, 1=u32, 2=u16, 3=u8) + // - [6] : Swap endianness + // ======================================================================= + + localparam [15:0] REG_EPID_SELF = 16'h00; //RW + localparam [15:0] REG_RESET_AND_FLUSH = 16'h04; //W + localparam [15:0] REG_OSTRM_CTRL_STATUS = 16'h08; //RW + localparam [15:0] REG_OSTRM_DST_EPID = 16'h0C; //W + localparam [15:0] REG_OSTRM_FC_FREQ_BYTES_LO = 16'h10; //W + localparam [15:0] REG_OSTRM_FC_FREQ_BYTES_HI = 16'h14; //W + localparam [15:0] REG_OSTRM_FC_FREQ_PKTS = 16'h18; //W + localparam [15:0] REG_OSTRM_FC_HEADROOM = 16'h1C; //W + localparam [15:0] REG_OSTRM_BUFF_CAP_BYTES_LO = 16'h20; //R + localparam [15:0] REG_OSTRM_BUFF_CAP_BYTES_HI = 16'h24; //R + localparam [15:0] REG_OSTRM_BUFF_CAP_PKTS = 16'h28; //R + localparam [15:0] REG_OSTRM_SEQ_ERR_CNT = 16'h2C; //R + localparam [15:0] REG_OSTRM_DATA_ERR_CNT = 16'h30; //R + localparam [15:0] REG_OSTRM_ROUTE_ERR_CNT = 16'h34; //R + localparam [15:0] REG_ISTRM_CTRL_STATUS = 16'h38; //RW + + // Configurable registers + reg [15:0] reg_epid_self = 16'h0; + reg reg_ctrl_reset = 1'b0; + reg reg_istrm_reset = 1'b0; + reg reg_ostrm_reset = 1'b0; + reg reg_ostrm_cfg_start = 1'b0; + wire reg_ostrm_cfg_pending; + wire reg_ostrm_cfg_failed; + reg reg_ostrm_cfg_lossy_xport = 1'b0; + reg [1:0] reg_ostrm_cfg_pyld_sw_buff = 2'd0; + reg [1:0] reg_ostrm_cfg_mdata_sw_buff = 2'd0; + reg reg_ostrm_cfg_swap_endian = 1'b0; + reg [15:0] reg_ostrm_dst_epid = 16'h0; + reg [39:0] reg_fc_freq_bytes = 40'h0; + reg [23:0] reg_fc_freq_pkts = 24'h0; + reg [15:0] reg_fc_headroom_bytes = 16'd0; + reg [7:0] reg_fc_headroom_pkts = 8'd0; + reg [1:0] reg_istrm_cfg_pyld_sw_buff = 2'd0; + reg [1:0] reg_istrm_cfg_mdata_sw_buff = 2'd0; + reg reg_istrm_cfg_swap_endian = 1'b0; + wire reg_fc_enabled; + wire [39:0] reg_buff_cap_bytes; + wire [23:0] reg_buff_cap_pkts; + wire [31:0] reg_seq_err_cnt; + wire [31:0] reg_data_err_cnt; + wire [31:0] reg_route_err_cnt; + + always @(posedge rfnoc_chdr_clk) begin + if (rfnoc_chdr_rst) begin + ctrlport_resp_ack <= 1'b0; + end else begin + // All transactions finish in 1 cycle + ctrlport_resp_ack <= ctrlport_req_wr | ctrlport_req_rd; + // Handle register writes + if (ctrlport_req_wr) begin + case(ctrlport_req_addr) + REG_EPID_SELF: + reg_epid_self <= ctrlport_req_data[15:0]; + REG_RESET_AND_FLUSH: + {reg_ctrl_reset, reg_istrm_reset, reg_ostrm_reset} <= ctrlport_req_data[2:0]; + REG_OSTRM_CTRL_STATUS: + {reg_ostrm_cfg_swap_endian, reg_ostrm_cfg_mdata_sw_buff, reg_ostrm_cfg_pyld_sw_buff, + reg_ostrm_cfg_lossy_xport, reg_ostrm_cfg_start} <= ctrlport_req_data[6:0]; + REG_OSTRM_DST_EPID: + reg_ostrm_dst_epid <= ctrlport_req_data[15:0]; + REG_OSTRM_FC_FREQ_BYTES_LO: + reg_fc_freq_bytes[31:0] <= ctrlport_req_data[31:0]; + REG_OSTRM_FC_FREQ_BYTES_HI: + reg_fc_freq_bytes[39:32] <= ctrlport_req_data[7:0]; + REG_OSTRM_FC_FREQ_PKTS: + reg_fc_freq_pkts <= ctrlport_req_data[23:0]; + REG_OSTRM_FC_HEADROOM: + {reg_fc_headroom_pkts, reg_fc_headroom_bytes} <= ctrlport_req_data[23:0]; + REG_ISTRM_CTRL_STATUS: + {reg_istrm_cfg_swap_endian, reg_istrm_cfg_mdata_sw_buff, reg_istrm_cfg_pyld_sw_buff} + <= ctrlport_req_data[6:2]; + endcase + end else begin + // Strobed registers + reg_ostrm_cfg_start <= 1'b0; + reg_ctrl_reset <= 1'b0; + reg_ostrm_reset <= 1'b0; + reg_istrm_reset <= 1'b0; + end + // Handle register reads + if (ctrlport_req_rd) begin + case(ctrlport_req_addr) + REG_EPID_SELF: + ctrlport_resp_data <= {16'h0, reg_epid_self}; + REG_OSTRM_CTRL_STATUS: + ctrlport_resp_data <= { + reg_fc_enabled, reg_ostrm_cfg_failed, reg_ostrm_cfg_pending, 23'h0, + reg_ostrm_cfg_mdata_sw_buff, reg_ostrm_cfg_pyld_sw_buff, + reg_ostrm_cfg_lossy_xport, 1'b0}; + REG_OSTRM_BUFF_CAP_BYTES_LO: + ctrlport_resp_data <= reg_buff_cap_bytes[31:0]; + REG_OSTRM_BUFF_CAP_BYTES_HI: + ctrlport_resp_data <= {24'h0, reg_buff_cap_bytes[39:32]}; + REG_OSTRM_BUFF_CAP_PKTS: + ctrlport_resp_data <= {8'h0, reg_buff_cap_pkts}; + REG_OSTRM_SEQ_ERR_CNT: + ctrlport_resp_data <= reg_seq_err_cnt; + REG_OSTRM_DATA_ERR_CNT: + ctrlport_resp_data <= reg_data_err_cnt; + REG_OSTRM_ROUTE_ERR_CNT: + ctrlport_resp_data <= reg_route_err_cnt; + REG_ISTRM_CTRL_STATUS: + ctrlport_resp_data <= {26'h0, + reg_istrm_cfg_mdata_sw_buff, reg_istrm_cfg_pyld_sw_buff, 2'b0}; + default: + ctrlport_resp_data <= 32'h0; + endcase + end + end + end + + // --------------------------------------------------- + // Data and Flow Control Path + // --------------------------------------------------- + genvar i; + generate if (AXIS_DATA_EN) begin: datapath + localparam INPUT_FLUSH_TIMEOUT_W = SIM_SPEEDUP ? 6 : 14; + + // Data => CHDR + //------------- + wire [CHDR_W-1:0] axis_di_tdata, axis_dis_tdata, axis_di_tdata_pre; + wire [5:0] axis_di_tdest; + wire axis_di_tlast, axis_dis_tlast; + wire axis_di_tvalid, axis_dis_tvalid; + wire axis_di_tready, axis_dis_tready; + + // Optional MUX to combine multiple input data ports into a single one + if (NUM_DATA_I == 6'd1) begin + axi_fifo #(.WIDTH(CHDR_W+1), .SIZE(1)) axis_s_reg_i ( + .clk(rfnoc_chdr_clk), .reset(rfnoc_chdr_rst | reg_ostrm_reset), .clear(1'b0), + .i_tdata({s_axis_data_tlast, s_axis_data_tdata}), + .i_tvalid(s_axis_data_tvalid), .i_tready(s_axis_data_tready), + .o_tdata({axis_di_tlast, axis_di_tdata_pre}), + .o_tvalid(axis_di_tvalid), .o_tready(axis_di_tready), + .space(), .occupied() + ); + assign axis_di_tdest = 6'd0; + end else begin + wire [((CHDR_W+6)*NUM_DATA_I)-1:0] s_axis_data_tdata_tmp; + for (i = 0; i < NUM_DATA_I; i=i+1) begin + assign s_axis_data_tdata_tmp[(i*(CHDR_W+6))+:(CHDR_W+6)] = {i[5:0], s_axis_data_tdata[(i*CHDR_W)+:CHDR_W]}; + end + + axi_mux #( + .WIDTH(CHDR_W+6), .SIZE(NUM_DATA_I), .PRIO(0), .PRE_FIFO_SIZE(1), .POST_FIFO_SIZE(1) + ) axis_s_mux_i ( + .clk(rfnoc_chdr_clk), .reset(rfnoc_chdr_rst | reg_ostrm_reset), .clear(1'b0), + .i_tdata(s_axis_data_tdata_tmp), .i_tlast(s_axis_data_tlast), + .i_tvalid(s_axis_data_tvalid), .i_tready(s_axis_data_tready), + .o_tdata({axis_di_tdest, axis_di_tdata_pre}), .o_tlast(axis_di_tlast), + .o_tvalid(axis_di_tvalid), .o_tready(axis_di_tready) + ); + end + + // Logic to correctly fill in the VC field in the CHDR header + reg axis_di_hdr = 1'b1; + always @(posedge rfnoc_chdr_clk) begin + if (rfnoc_chdr_rst | reg_ostrm_reset) + axis_di_hdr <= 1'b1; + else if (axis_di_tvalid && axis_di_tready) + axis_di_hdr <= axis_di_tlast; + end + assign axis_di_tdata[63:0] = axis_di_hdr ? chdr_set_vc(axis_di_tdata_pre[63:0], axis_di_tdest) : + axis_di_tdata_pre[63:0]; + if (CHDR_W > 64) begin + assign axis_di_tdata[CHDR_W-1:64] = axis_di_tdata_pre[CHDR_W-1:64]; + end + + // Module to swap words in the payload and metadata depending on SW settings + chdr_data_swapper #( .CHDR_W(CHDR_W)) di_swap_i ( + .clk (rfnoc_chdr_clk), + .rst (rfnoc_chdr_rst | reg_ostrm_reset), + .payload_sw_buff(reg_ostrm_cfg_pyld_sw_buff), + .mdata_sw_buff (reg_ostrm_cfg_mdata_sw_buff), + .swap_endianness(reg_ostrm_cfg_swap_endian), + .s_axis_tdata (axis_di_tdata), + .s_axis_tlast (axis_di_tlast), + .s_axis_tvalid (axis_di_tvalid), + .s_axis_tready (axis_di_tready), + .m_axis_tdata (axis_dis_tdata), + .m_axis_tlast (axis_dis_tlast), + .m_axis_tvalid (axis_dis_tvalid), + .m_axis_tready (axis_dis_tready) + ); + + // Stream endpoint flow-control output module + chdr_stream_output #( + .CHDR_W(CHDR_W), .MTU(MTU) + ) strm_output_i ( + .clk (rfnoc_chdr_clk), + .rst (rfnoc_chdr_rst | reg_ostrm_reset), + .m_axis_chdr_tdata (data_o_tdata), + .m_axis_chdr_tlast (data_o_tlast), + .m_axis_chdr_tvalid (data_o_tvalid), + .m_axis_chdr_tready (data_o_tready), + .s_axis_data_tdata (axis_dis_tdata), + .s_axis_data_tlast (axis_dis_tlast), + .s_axis_data_tvalid (axis_dis_tvalid), + .s_axis_data_tready (axis_dis_tready), + .s_axis_strs_tdata (strs_i_tdata), + .s_axis_strs_tlast (strs_i_tlast), + .s_axis_strs_tvalid (strs_i_tvalid), + .s_axis_strs_tready (strs_i_tready), + .cfg_start (reg_ostrm_cfg_start), + .cfg_pending (reg_ostrm_cfg_pending), + .cfg_failed (reg_ostrm_cfg_failed), + .cfg_lossy_xport (reg_ostrm_cfg_lossy_xport), + .cfg_dst_epid (reg_ostrm_dst_epid), + .cfg_this_epid (reg_epid_self), + .cfg_fc_freq_bytes (reg_fc_freq_bytes), + .cfg_fc_freq_pkts (reg_fc_freq_pkts), + .cfg_fc_headroom_bytes(reg_fc_headroom_bytes), + .cfg_fc_headroom_pkts (reg_fc_headroom_pkts), + .fc_enabled (reg_fc_enabled), + .capacity_bytes (reg_buff_cap_bytes), + .capacity_pkts (reg_buff_cap_pkts), + .seq_err_stb (strm_seq_err_stb), + .seq_err_cnt (reg_seq_err_cnt), + .data_err_stb (strm_data_err_stb), + .data_err_cnt (reg_data_err_cnt), + .route_err_stb (strm_route_err_stb), + .route_err_cnt (reg_route_err_cnt) + ); + + // CHDR => Data + //------------- + wire [CHDR_W-1:0] axis_do_tdata, axis_dos_tdata; + wire axis_do_tlast, axis_dos_tlast; + wire axis_do_tvalid, axis_dos_tvalid; + wire axis_do_tready, axis_dos_tready; + + // Stream endpoint flow-control input module + chdr_stream_input #( + .CHDR_W(CHDR_W), .BUFF_SIZE(INGRESS_BUFF_SIZE), + .FLUSH_TIMEOUT_W(INPUT_FLUSH_TIMEOUT_W), + .MONITOR_EN(0), .SIGNAL_ERRS(REPORT_STRM_ERRS) + ) strm_input_i ( + .clk (rfnoc_chdr_clk), + .rst (rfnoc_chdr_rst | reg_istrm_reset), + .s_axis_chdr_tdata (data_i_tdata), + .s_axis_chdr_tlast (data_i_tlast), + .s_axis_chdr_tvalid(data_i_tvalid), + .s_axis_chdr_tready(data_i_tready), + .m_axis_data_tdata (axis_do_tdata), + .m_axis_data_tlast (axis_do_tlast), + .m_axis_data_tvalid(axis_do_tvalid), + .m_axis_data_tready(axis_do_tready), + .m_axis_strs_tdata (strs_o_tdata), + .m_axis_strs_tlast (strs_o_tlast), + .m_axis_strs_tvalid(strs_o_tvalid), + .m_axis_strs_tready(strs_o_tready), + .data_err_stb (signal_data_err) + ); + + // Module to swap words in the payload and metadata depending on SW settings + chdr_data_swapper #( .CHDR_W(CHDR_W)) do_swap_i ( + .clk (rfnoc_chdr_clk), + .rst (rfnoc_chdr_rst | reg_istrm_reset), + .payload_sw_buff(reg_istrm_cfg_pyld_sw_buff), + .mdata_sw_buff (reg_istrm_cfg_mdata_sw_buff), + .swap_endianness(reg_istrm_cfg_swap_endian), + .s_axis_tdata (axis_do_tdata), + .s_axis_tlast (axis_do_tlast), + .s_axis_tvalid (axis_do_tvalid), + .s_axis_tready (axis_do_tready), + .m_axis_tdata (axis_dos_tdata), + .m_axis_tlast (axis_dos_tlast), + .m_axis_tvalid (axis_dos_tvalid), + .m_axis_tready (axis_dos_tready) + ); + + // Optional DEMUX to split multiple single stream into multiple outputs + // Packets with an invalid (out of bounds) VC goes to port 0 + if (NUM_DATA_O == 6'd1) begin + axi_fifo #(.WIDTH(CHDR_W+1), .SIZE(1)) axis_m_reg_i ( + .clk(rfnoc_chdr_clk), .reset(rfnoc_chdr_rst | reg_istrm_reset), .clear(1'b0), + .i_tdata({axis_dos_tlast, axis_dos_tdata}), + .i_tvalid(axis_dos_tvalid), .i_tready(axis_dos_tready), + .o_tdata({m_axis_data_tlast, m_axis_data_tdata}), + .o_tvalid(m_axis_data_tvalid), .o_tready(m_axis_data_tready), + .space(), .occupied() + ); + end else begin + wire [CHDR_W-1:0] data_header; + wire [5:0] data_vc = chdr_get_vc(data_header[63:0]); + axi_demux #( + .WIDTH(CHDR_W), .SIZE(NUM_DATA_O), .PRE_FIFO_SIZE(1), .POST_FIFO_SIZE(1) + ) axis_m_demux_i ( + .clk(rfnoc_chdr_clk), .reset(rfnoc_chdr_rst | reg_istrm_reset), .clear(1'b0), + .header(data_header), + .dest((data_vc < NUM_DATA_O) ? data_vc[$clog2(NUM_DATA_O)-1:0] : {$clog2(NUM_DATA_O){1'b0}}), + .i_tdata(axis_dos_tdata), .i_tlast(axis_dos_tlast), + .i_tvalid(axis_dos_tvalid), .i_tready(axis_dos_tready), + .o_tdata(m_axis_data_tdata), .o_tlast(m_axis_data_tlast), + .o_tvalid(m_axis_data_tvalid), .o_tready(m_axis_data_tready) + ); + end + + end else begin // if (AXIS_DATA_EN) + + assign data_i_tready = 1'b1; + assign data_o_tdata = {CHDR_W{1'b0}}; + assign data_o_tlast = 1'b0; + assign data_o_tvalid = 1'b0; + + assign strs_i_tready = 1'b1; + assign strs_o_tdata = {CHDR_W{1'b0}}; + assign strs_o_tlast = 1'b0; + assign strs_o_tvalid = 1'b0; + + assign s_axis_data_tready = {NUM_DATA_I{1'b0}}; + assign m_axis_data_tdata = {(CHDR_W*NUM_DATA_O){1'b0}}; + assign m_axis_data_tlast = {NUM_DATA_O{1'b0}}; + assign m_axis_data_tvalid = {NUM_DATA_O{1'b0}}; + + end endgenerate + + // --------------------------------------------------- + // Control Path + // --------------------------------------------------- + generate if (AXIS_CTRL_EN) begin: ctrlpath + + // Convert from a CHDR control packet to an AXIS control packet + chdr_to_axis_ctrl #( + .CHDR_W(CHDR_W), .THIS_PORTID(CTRL_XBAR_PORT) + ) chdr_ctrl_adapter_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk), + .rfnoc_chdr_rst (rfnoc_chdr_rst | reg_ctrl_reset), + .this_epid (reg_epid_self), + .s_rfnoc_chdr_tdata (ctrl_i_tdata), + .s_rfnoc_chdr_tlast (ctrl_i_tlast), + .s_rfnoc_chdr_tvalid(ctrl_i_tvalid), + .s_rfnoc_chdr_tready(ctrl_i_tready), + .m_rfnoc_chdr_tdata (ctrl_o_tdata), + .m_rfnoc_chdr_tlast (ctrl_o_tlast), + .m_rfnoc_chdr_tvalid(ctrl_o_tvalid), + .m_rfnoc_chdr_tready(ctrl_o_tready), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst), + .s_rfnoc_ctrl_tdata (s_axis_ctrl_tdata), + .s_rfnoc_ctrl_tlast (s_axis_ctrl_tlast), + .s_rfnoc_ctrl_tvalid(s_axis_ctrl_tvalid), + .s_rfnoc_ctrl_tready(s_axis_ctrl_tready), + .m_rfnoc_ctrl_tdata (m_axis_ctrl_tdata), + .m_rfnoc_ctrl_tlast (m_axis_ctrl_tlast), + .m_rfnoc_ctrl_tvalid(m_axis_ctrl_tvalid), + .m_rfnoc_ctrl_tready(m_axis_ctrl_tready) + ); + + end else begin // if (AXIS_CTRL_EN) + + assign ctrl_i_tready = 1'b1; + assign ctrl_o_tdata = {CHDR_W{1'b0}}; + assign ctrl_o_tlast = 1'b0; + assign ctrl_o_tvalid = 1'b0; + + assign s_axis_ctrl_tready = 1'b1; + assign m_axis_ctrl_tdata = 32'h0; + assign m_axis_ctrl_tlast = 1'b0; + assign m_axis_ctrl_tvalid = 1'b0; + + end endgenerate + +endmodule // chdr_stream_endpoint + diff --git a/fpga/usrp3/lib/rfnoc/core/chdr_stream_input.v b/fpga/usrp3/lib/rfnoc/core/chdr_stream_input.v new file mode 100644 index 000000000..2a8a9c628 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/chdr_stream_input.v @@ -0,0 +1,569 @@ +// +// Copyright 2018-2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_stream_input +// Description: +// Implements the CHDR input port for a stream endpoint. +// The module accepts stream command and data packets and +// emits stream status packets. Flow control and error state +// is communicated using stream status packets. There are no +// external config interfaces because all configuration is done +// using stream command packets. +// +// Parameters: +// - CHDR_W: Width of the CHDR bus in bits +// - BUFF_SIZE: Buffer size in log2 of the number of words in the +// ingress buffer for the stream +// - FLUSH_TIMEOUT_W: log2 of the number of cycles to wait in order +// to flush the input stream +// - SIGNAL_ERRS: If set to 1 then all stream errors will be notified +// upstream, otherwise ALL errors are ignored +// +// Signals: +// - s_axis_chdr_* : Input CHDR stream (AXI-Stream) +// - m_axis_chdr_* : Output flow-controlled CHDR stream (AXI-Stream) +// - m_axis_strs_* : Output stream status (AXI-Stream) +// - data_err_stb : If asserted, a data error notification is sent upstream +// + +module chdr_stream_input #( + parameter CHDR_W = 256, + parameter BUFF_SIZE = 14, + parameter FLUSH_TIMEOUT_W = 14, + parameter MONITOR_EN = 1, + parameter SIGNAL_ERRS = 1 +)( + // Clock, reset and settings + input wire clk, + input wire rst, + // CHDR in (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_chdr_tdata, + input wire s_axis_chdr_tlast, + input wire s_axis_chdr_tvalid, + output wire s_axis_chdr_tready, + // Flow controlled data out (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_data_tdata, + output wire m_axis_data_tlast, + output wire m_axis_data_tvalid, + input wire m_axis_data_tready, + // Stream status out (AXI-Stream) + output reg [CHDR_W-1:0] m_axis_strs_tdata, + output wire m_axis_strs_tlast, + output wire m_axis_strs_tvalid, + input wire m_axis_strs_tready, + // External stream error signal + input wire data_err_stb +); + + // The buffer size depends on the BUFF_SIZE parameter + localparam [40:0] BUFF_SIZE_BYTES = ((41'h1 << BUFF_SIZE) * (CHDR_W / 8)) - 41'h1; + // This is a flit-buffer. No packet limits + localparam [23:0] BUFF_SIZE_PKTS = 24'hFFFFFF; + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_chdr_internal_utils.vh" + + // --------------------------------------------------- + // Ingress Buffer and Flow Control Logic + // --------------------------------------------------- + wire [CHDR_W-1:0] buff_tdata; + wire buff_tlast, buff_tvalid; + reg buff_tready; + wire [15:0] buff_info; + + chdr_ingress_fifo #( + .WIDTH(CHDR_W), .SIZE(BUFF_SIZE) + ) ingress_fifo_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata(s_axis_chdr_tdata), .i_tlast(s_axis_chdr_tlast), + .i_tvalid(s_axis_chdr_tvalid), .i_tready(s_axis_chdr_tready), + .o_tdata(buff_tdata), .o_tlast(buff_tlast), + .o_tvalid(buff_tvalid), .o_tready(buff_tready) + ); + + generate if (MONITOR_EN) begin + wire [BUFF_SIZE:0] occ_lines; + axis_fifo_monitor #( .COUNT_W(BUFF_SIZE+1) ) fifo_mon_i ( + .clk(clk), .reset(rst), + .i_tlast(s_axis_chdr_tlast), .i_tvalid(s_axis_chdr_tvalid), .i_tready(s_axis_chdr_tready), + .o_tlast(buff_tlast), .o_tvalid(buff_tvalid), .o_tready(buff_tready), + .i_sop(), .i_eop(), .o_sop(), .o_eop(), + .occupied(occ_lines), .occupied_pkts() + ); + // buff_info represents a fraction of the fullness of the buffer + // fullness percentage = (buff_info / 32768) * 100 + if (BUFF_SIZE + 1 >= 16) + assign buff_info = occ_lines[BUFF_SIZE:(BUFF_SIZE-15)]; + else + assign buff_info = {occ_lines, {(15-BUFF_SIZE){1'b0}}}; + end else begin + assign buff_info = 16'd0; + end endgenerate + + // Flow Control State + // xfer_cnt: Total transfer count since fc_enabled = 1 + // accum: Transfer count since last FC response + // fc_freq: The threshold for sending an FC response + reg [63:0] xfer_cnt_bytes = 64'd0; + reg [39:0] xfer_cnt_pkts = 40'd0; + reg [63:0] accum_bytes = 64'd0; + reg [39:0] accum_pkts = 40'd0; + reg [63:0] fc_freq_bytes = 64'd0; + reg [39:0] fc_freq_pkts = 40'd0; + + // State machine transition signals info + reg fc_enabled = 1'b0; // Is flow control enabled? + wire fc_ping; // A flow control response was requested + wire fc_first_resp; // Send the first flow control response + wire fc_refresh; // Refresh accumulated values + wire fc_override; // Override total xfer counts + reg fc_override_del = 1'b0; + reg [3:0] fc_due_shreg = 4'h0; // Is a response due? (shift register) + + // Endpoint IDs of this endpoint and the stream source + reg [15:0] this_epid = 16'd0, return_epid = 16'd0; + + // Cached values from a stream command + reg [63:0] strc_num_bytes; + reg [39:0] strc_num_pkts; + reg [3:0] strc_op_data; // Unused for now + reg [3:0] strc_op_code; + + // Total transfer count updater + always @(posedge clk) begin + if (rst || !fc_enabled) begin + // Reset + xfer_cnt_bytes <= 64'd0; + xfer_cnt_pkts <= 40'd0; + end else if (fc_override) begin + // Override + xfer_cnt_bytes <= strc_num_bytes; + xfer_cnt_pkts <= strc_num_pkts; + end else if (buff_tvalid && buff_tready) begin + // Count + xfer_cnt_bytes <= xfer_cnt_bytes + (CHDR_W/8); + if (buff_tlast) + xfer_cnt_pkts <= xfer_cnt_pkts + 40'd1; + end + end + + // Accumulated transfer count updater + always @(posedge clk) begin + if (rst || !fc_enabled || fc_refresh) begin + // Reset + accum_bytes <= 64'd0; + accum_pkts <= 40'd0; + end else if (buff_tvalid && buff_tready) begin + // Count + accum_bytes <= accum_bytes + (CHDR_W/8); + if (buff_tlast) + accum_pkts <= accum_pkts + 40'd1; + end + end + + // Flow control trigger + // Why a shift-register here? + // 1. For edge detection + // 2. To allow the tools to re-time the wide comparators. + // We don't care about the latency here because stream + // status messages are asynchronous wrt the input. + always @(posedge clk) begin + if (rst || !fc_enabled) begin + fc_due_shreg <= 4'h0; + end else begin + fc_due_shreg <= { + fc_due_shreg[2:0], + (accum_bytes >= fc_freq_bytes) || (accum_pkts >= fc_freq_pkts) + }; + end + end + wire fc_resp_due = fc_due_shreg[2] && !fc_due_shreg[3]; + + // --------------------------------------------------- + // Stream Command Handler + // --------------------------------------------------- + localparam [2:0] ST_IN_HDR = 3'd0; // The CHDR header of an input pkt + localparam [2:0] ST_IN_DATA = 3'd1; // The CHDR body (incl. mdata) of an input pkt + localparam [2:0] ST_STRC_W0 = 3'd2; // The first word of a stream command + localparam [2:0] ST_STRC_W1 = 3'd3; // The second word of a stream command + localparam [2:0] ST_STRC_EXEC = 3'd4; // A stream command is executing + localparam [2:0] ST_FLUSH = 3'd5; // Input is flushing + localparam [2:0] ST_DROP = 3'd6; // Current packet is being dropped + + reg [2:0] state = ST_IN_HDR; // State of the input state machine + reg pkt_too_long = 1'b0; // Error case. Packet is too long + reg is_first_data_pkt = 1'b1; // Is this the first data pkt after fc_enabled = 1? + reg is_first_strc_pkt = 1'b1; // Is this the strm cmd data pkt after fc_enabled = 1? + reg [15:0] exp_data_seq_num = 16'd0; // Expected sequence number for the next data pkt + reg [15:0] exp_strc_seq_num = 16'd0; // Expected sequence number for the next stream cmd pkt + reg [15:0] strc_dst_epid = 16'd0; // EPID in CHDR header of STRC packet + + reg [FLUSH_TIMEOUT_W-1:0] flush_counter = {FLUSH_TIMEOUT_W{1'b0}}; + + // Shortcuts + wire is_data_pkt = + chdr_get_pkt_type(buff_tdata[63:0]) == CHDR_PKT_TYPE_DATA || + chdr_get_pkt_type(buff_tdata[63:0]) == CHDR_PKT_TYPE_DATA_TS; + wire is_strc_pkt = + chdr_get_pkt_type(buff_tdata[63:0]) == CHDR_PKT_TYPE_STRC; + + // Error Logic + wire data_seq_err_stb = (state == ST_IN_HDR) && is_data_pkt && !is_first_data_pkt && + (chdr_get_seq_num(buff_tdata[63:0]) != exp_data_seq_num); + wire strc_seq_err_stb = (state == ST_IN_HDR) && is_strc_pkt && !is_first_strc_pkt && + (chdr_get_seq_num(buff_tdata[63:0]) != exp_strc_seq_num); + wire seq_err_stb = (data_seq_err_stb || strc_seq_err_stb) && buff_tvalid && buff_tready; + + wire route_err_stb = buff_tvalid && buff_tready && (state == ST_IN_HDR) && + (chdr_get_dst_epid(buff_tdata[63:0]) != this_epid); + + // Break critical paths to response FIFO + reg [47:0] stream_err_info = 48'h0; + reg stream_err_stb = 1'b0; + reg [3:0] stream_err_status = CHDR_STRS_STATUS_OKAY; + + always @(posedge clk) begin + if (rst || (SIGNAL_ERRS == 0)) begin + stream_err_stb <= 1'b0; + end else begin + stream_err_stb <= seq_err_stb | route_err_stb | data_err_stb; + if (seq_err_stb) begin + stream_err_status <= CHDR_STRS_STATUS_SEQERR; + // The extended info has the packet type (to detect which stream + // had an error), the expected and actual sequence number. + stream_err_info <= {13'h0, chdr_get_pkt_type(buff_tdata[63:0]), + data_seq_err_stb ? exp_data_seq_num : exp_strc_seq_num, + chdr_get_seq_num(buff_tdata[63:0])}; + end else if (route_err_stb) begin + stream_err_status <= CHDR_STRS_STATUS_RTERR; + // The extended info has the expected and actual destination EPID. + stream_err_info <= {16'd0, this_epid, chdr_get_dst_epid(buff_tdata[63:0])}; + end else begin + stream_err_status <= CHDR_STRS_STATUS_DATAERR; + // The extended info has the expected and actual destination EPID. + stream_err_info <= {16'd0, this_epid, chdr_get_dst_epid(buff_tdata[63:0])}; + end + end + end + + // Input State Machine + // - Pass data packets forward + // - Consume stream cmd packets + always @(posedge clk) begin + if (rst) begin + state <= ST_IN_HDR; + pkt_too_long <= 1'b0; + fc_enabled <= 1'b0; + end else begin + case (state) + ST_IN_HDR: begin + if (buff_tvalid && buff_tready) begin + if (!buff_tlast) begin + // Classify packet and... + if (is_strc_pkt) begin + // ...consume if it is a stream command or... + state <= ST_STRC_W0; + end else if (is_data_pkt) begin + // ...pass to output if it is a data packet... + state <= ST_IN_DATA; + end else begin + // ... otherwise drop. + state <= ST_DROP; + end + end + // Update other state vars + pkt_too_long <= 1'b0; + if (is_strc_pkt) begin + is_first_strc_pkt <= 1'b0; + strc_dst_epid <= chdr_get_dst_epid(buff_tdata[63:0]); + exp_strc_seq_num <= chdr_get_seq_num(buff_tdata[63:0]) + 16'd1; + end else if (is_data_pkt) begin + is_first_data_pkt <= 1'b0; + exp_data_seq_num <= chdr_get_seq_num(buff_tdata[63:0]) + 16'd1; + end + end + end + ST_IN_DATA: begin + // Pass the data packet forward + if (buff_tvalid && buff_tready && buff_tlast) + state <= ST_IN_HDR; + end + ST_STRC_W0: begin + if (buff_tvalid && buff_tready) begin + // Consume the first word of a stream command packet + if (CHDR_W > 64) begin + strc_num_bytes <= chdr128_strc_get_num_bytes(buff_tdata[127:0]); + strc_num_pkts <= chdr128_strc_get_num_pkts (buff_tdata[127:0]); + strc_op_data <= chdr128_strc_get_op_data (buff_tdata[127:0]); + strc_op_code <= chdr128_strc_get_op_code (buff_tdata[127:0]); + return_epid <= chdr128_strs_get_src_epid (buff_tdata[127:0]); + state <= ST_STRC_EXEC; + pkt_too_long <= ~buff_tlast; + end else begin + strc_num_pkts <= chdr64_strc_get_num_pkts(buff_tdata[63:0]); + strc_op_data <= chdr64_strc_get_op_data (buff_tdata[63:0]); + strc_op_code <= chdr64_strc_get_op_code (buff_tdata[63:0]); + return_epid <= chdr64_strs_get_src_epid(buff_tdata[63:0]); + state <= ST_STRC_W1; + end + end + end + ST_STRC_W1: begin + if (buff_tvalid && buff_tready) begin + // Consume the second word of a stream command packet + strc_num_bytes <= chdr64_strc_get_num_bytes(buff_tdata[63:0]); + state <= ST_STRC_EXEC; + pkt_too_long <= ~buff_tlast; + end + end + ST_STRC_EXEC: begin + case (strc_op_code) + CHDR_STRC_OPCODE_INIT: begin + // Configure FC but disable it temporarily + fc_freq_bytes <= strc_num_bytes; + fc_freq_pkts <= strc_num_pkts; + this_epid <= strc_dst_epid; + fc_enabled <= 1'b0; + // Flush the input + state <= ST_FLUSH; + flush_counter <= {FLUSH_TIMEOUT_W{1'b1}}; + end + CHDR_STRC_OPCODE_PING: begin + // Ping can complete in 1 cycle + state <= pkt_too_long ? ST_DROP : ST_IN_HDR; + end + CHDR_STRC_OPCODE_RESYNC: begin + // Resync can complete in 1 cycle + state <= pkt_too_long ? ST_DROP : ST_IN_HDR; + end + default: begin + state <= pkt_too_long ? ST_DROP : ST_IN_HDR; + end + endcase + end + ST_FLUSH: begin + // Drop until the next packet arrives + if (buff_tvalid && buff_tready) begin + flush_counter <= {FLUSH_TIMEOUT_W{1'b1}}; + end else begin + flush_counter <= flush_counter - 'd1; + if (flush_counter == {FLUSH_TIMEOUT_W{1'b0}}) begin + // Done flushing. Re-arm flow control and reset packet + // sequence check info. + fc_enabled <= 1'b1; + is_first_data_pkt <= 1'b1; + is_first_strc_pkt <= 1'b1; + state <= ST_IN_HDR; + end + end + end + ST_DROP: begin + // Drop until the next packet arrives + if (buff_tvalid && buff_tready && buff_tlast) + state <= ST_IN_HDR; + end + default: begin + // We should never get here + state <= ST_IN_HDR; + end + endcase + end + end + + always @(*) begin + case (state) + ST_IN_HDR: + buff_tready = m_axis_data_tready || !is_data_pkt; + ST_IN_DATA: + buff_tready = m_axis_data_tready; + ST_STRC_W0: + buff_tready = 1'b1; + ST_STRC_W1: + buff_tready = 1'b1; + ST_FLUSH: + buff_tready = 1'b1; + ST_DROP: + buff_tready = 1'b1; + default: + buff_tready = 1'b0; + endcase + end + + // Logic to drive output port + assign m_axis_data_tdata = buff_tdata; + assign m_axis_data_tlast = buff_tlast; + assign m_axis_data_tvalid = buff_tvalid && + ((state == ST_IN_HDR && is_data_pkt) || state == ST_IN_DATA); + + // Logic to drive triggers + assign fc_ping = (state == ST_STRC_EXEC) && (strc_op_code == CHDR_STRC_OPCODE_PING); + assign fc_first_resp = (state == ST_FLUSH) && (flush_counter == {FLUSH_TIMEOUT_W{1'b0}}); + assign fc_override = (state == ST_STRC_EXEC) && (strc_op_code == CHDR_STRC_OPCODE_RESYNC); + always @(posedge clk) fc_override_del <= fc_override; + + wire [51:0] resp_o_tdata; + wire resp_o_tvalid; + reg [51:0] resp_i_tdata; + reg resp_i_tvalid = 1'b0; + + // Send a stream status packet for the following cases: + // - Immediately after initialization + // - If a response is explicitly requested (ping) + // - If a response is due i.e. we have exceeded the frequency + // - If FC is resynchronized via a stream cmd + // - If an error is detected in the stream + always @(posedge clk) begin + if (rst) begin + resp_i_tvalid <= 1'b0; + resp_i_tdata <= 52'h0; + end else begin + resp_i_tvalid <= fc_first_resp || fc_ping || fc_resp_due || fc_override_del || stream_err_stb; + resp_i_tdata <= stream_err_stb ? {stream_err_info, stream_err_status} : {48'h0, CHDR_STRS_STATUS_OKAY}; + end + end + + // --------------------------------------------------- + // Stream Status Responder + // --------------------------------------------------- + localparam [2:0] ST_STRS_IDLE = 3'd0; // Waiting for response to post + localparam [2:0] ST_STRS_HDR = 3'd1; // Sending response CHDR header + localparam [2:0] ST_STRS_W0 = 3'd2; // Sending first response word + localparam [2:0] ST_STRS_W1 = 3'd3; // Sending second response word + localparam [2:0] ST_STRS_W2 = 3'd4; // Sending third response word + localparam [2:0] ST_STRS_W3 = 3'd5; // Sending fourth response word + localparam [2:0] ST_STRS_DONE = 3'd6; // Consuming response + + reg [2:0] resp_state = ST_STRS_IDLE; // State of the responder + reg [15:0] resp_seq_num = 16'd0; // Current sequence number of response + + assign fc_refresh = (resp_state == ST_STRS_DONE); + + // A FIFO that holds up to 32 posted responses and status information + // NOTE: This is a lossy FIFO. If the downstream response port is clogged + // then we will drop responses. That should never happen in a normal operating + // scenario. + axi_fifo #(.WIDTH(48 + 4), .SIZE(5)) resp_fifo_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata(resp_i_tdata), .i_tvalid(resp_i_tvalid), .i_tready(/* Lossy FIFO */), + .o_tdata(resp_o_tdata), .o_tvalid(resp_o_tvalid), .o_tready(resp_state == ST_STRS_DONE || !fc_enabled), + .space(), .occupied() + ); + + // Responder State Machine + // - Wait for response to appear in FIFO + // - Output a full packet (different # of xfers depending on CHDR_W) + always @(posedge clk) begin + if (rst || !fc_enabled) begin + resp_state <= ST_STRS_IDLE; + resp_seq_num <= 16'd0; + end else begin + case (resp_state) + ST_STRS_IDLE: begin + if (resp_o_tvalid) + resp_state <= ST_STRS_HDR; + end + ST_STRS_HDR: begin + if (m_axis_strs_tready) + resp_state <= ST_STRS_W0; + end + ST_STRS_W0: begin + if (m_axis_strs_tready) + if (CHDR_W < 256) + resp_state <= ST_STRS_W1; + else + resp_state <= ST_STRS_DONE; + end + ST_STRS_W1: begin + if (m_axis_strs_tready) + if (CHDR_W < 128) + resp_state <= ST_STRS_W2; + else + resp_state <= ST_STRS_DONE; + end + ST_STRS_W2: begin + if (m_axis_strs_tready) + resp_state <= ST_STRS_W3; + end + ST_STRS_W3: begin + if (m_axis_strs_tready) + resp_state <= ST_STRS_DONE; + end + ST_STRS_DONE: begin + resp_state <= ST_STRS_IDLE; + resp_seq_num <= resp_seq_num + 16'd1; + end + default: begin + // We should never get here + resp_state <= ST_STRS_IDLE; + end + endcase + end + end + + // Output data. Header and Payload + wire [63:0] strs_header = chdr_build_header( + /*VC*/ 6'd0, /*eob*/ 1'b0, /*eov*/ 1'b0, CHDR_PKT_TYPE_STRS, CHDR_NO_MDATA, + resp_seq_num, 16'd32+(CHDR_W/8), return_epid); + wire [255:0] strs_payload = chdr256_strs_build( + /*statusinfo*/ resp_o_tdata[51:4], buff_info, + xfer_cnt_bytes, xfer_cnt_pkts, + BUFF_SIZE_PKTS[23:0], BUFF_SIZE_BYTES[39:0], + resp_o_tdata[3:0], this_epid); + + // m_axis_strs_* signal values depend on CHDR_W + generate + if (CHDR_W == 64) begin + // Response spans 5 transfers (header + 4 words) + assign m_axis_strs_tlast = (resp_state == ST_STRS_W3); + always @(*) begin + case (resp_state) + ST_STRS_W0: + m_axis_strs_tdata = strs_payload[63:0]; + ST_STRS_W1: + m_axis_strs_tdata = strs_payload[127:64]; + ST_STRS_W2: + m_axis_strs_tdata = strs_payload[191:128]; + ST_STRS_W3: + m_axis_strs_tdata = strs_payload[255:192]; + default: + m_axis_strs_tdata = strs_header; + endcase + end + end else if (CHDR_W == 128) begin + // Response spans 3 transfers (header + 2 words) + assign m_axis_strs_tlast = (resp_state == ST_STRS_W1); + always @(*) begin + case (resp_state) + ST_STRS_W0: + m_axis_strs_tdata = strs_payload[127:0]; + ST_STRS_W1: + m_axis_strs_tdata = strs_payload[255:128]; + default: + m_axis_strs_tdata = {64'h0, strs_header}; + endcase + end + end else begin + // Response spans 2 transfers (header + word) + assign m_axis_strs_tlast = (resp_state == ST_STRS_W0); + always @(*) begin + case (resp_state) + ST_STRS_W0: + m_axis_strs_tdata[255:0] = strs_payload; + default: + m_axis_strs_tdata[255:0] = {192'h0, strs_header}; + endcase + if (CHDR_W > 256) begin + m_axis_strs_tdata[CHDR_W-1:256] = 'h0; + end + end + end + endgenerate + + assign m_axis_strs_tvalid = (resp_state != ST_STRS_IDLE) && (resp_state != ST_STRS_DONE); + +endmodule // chdr_stream_input diff --git a/fpga/usrp3/lib/rfnoc/core/chdr_stream_output.v b/fpga/usrp3/lib/rfnoc/core/chdr_stream_output.v new file mode 100644 index 000000000..271c7fccc --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/chdr_stream_output.v @@ -0,0 +1,557 @@ +// +// Copyright 2018-2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_stream_output +// Description: +// Implements the CHDR output port for a stream endpoint. +// The module generates stream command packets to setup +// a downstream endpoint module (chdr_stream_input). Once +// a stream is setup, the CHDR data on the axis_data port +// can be sent downstream with full flow control. Stream +// status messages are recieved from the downstream node +// to update flow control state. This module has an external +// configuration bus to initiate stream creation. +// +// Parameters: +// - CHDR_W: Width of the CHDR bus in bits +// - MTU: Log2 of the maximum number of lines in a packet +// +// Signals: +// - m_axis_chdr_* : Output CHDR stream (AXI-Stream) +// - s_axis_data_* : Input CHDR Data stream (AXI-Stream) before flow control +// - s_axis_strs_* : Input stream status (AXI-Stream) + +module chdr_stream_output #( + parameter CHDR_W = 256, + parameter MTU = 10 +)( + // Clock, reset and settings + input wire clk, + input wire rst, + // CHDR out (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_chdr_tdata, + output wire m_axis_chdr_tlast, + output wire m_axis_chdr_tvalid, + input wire m_axis_chdr_tready, + // Data packets in (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_data_tdata, + input wire s_axis_data_tlast, + input wire s_axis_data_tvalid, + output wire s_axis_data_tready, + // Stream status in (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_strs_tdata, + input wire s_axis_strs_tlast, + input wire s_axis_strs_tvalid, + output wire s_axis_strs_tready, + // Configuration port + input wire cfg_start, + output reg cfg_pending = 1'b0, + output reg cfg_failed = 1'b0, + input wire cfg_lossy_xport, + input wire [15:0] cfg_dst_epid, + input wire [15:0] cfg_this_epid, + input wire [39:0] cfg_fc_freq_bytes, + input wire [23:0] cfg_fc_freq_pkts, + input wire [15:0] cfg_fc_headroom_bytes, + input wire [7:0] cfg_fc_headroom_pkts, + // Flow control status + output reg fc_enabled = 1'b0, + output reg [39:0] capacity_bytes = 40'd0, + output reg [23:0] capacity_pkts = 24'd0, + // Stream status + output wire seq_err_stb, + output reg [31:0] seq_err_cnt = 32'd0, + output wire data_err_stb, + output reg [31:0] data_err_cnt = 32'd0, + output wire route_err_stb, + output reg [31:0] route_err_cnt = 32'd0 +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_chdr_internal_utils.vh" + + localparam CHDR_W_LOG2 = $clog2(CHDR_W); + + // --------------------------------------------------- + // Output packet gate + // --------------------------------------------------- + reg [CHDR_W-1:0] chdr_out_tdata; + reg chdr_out_tlast, chdr_out_tvalid; + wire chdr_out_tready; + + axi_packet_gate #( + .WIDTH(CHDR_W), .SIZE(MTU), .USE_AS_BUFF(0) + ) chdr_pkt_gate_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata(chdr_out_tdata), .i_tlast(chdr_out_tlast), .i_terror(1'b0), + .i_tvalid(chdr_out_tvalid), .i_tready(chdr_out_tready), + .o_tdata(m_axis_chdr_tdata), .o_tlast(m_axis_chdr_tlast), + .o_tvalid(m_axis_chdr_tvalid), .o_tready(m_axis_chdr_tready) + ); + + // --------------------------------------------------- + // Flow Control State + // --------------------------------------------------- + + // send_cnt: Total transfer count at the sender (here) + // recv_cnt: Total transfer count at the receiver + // accum: Transfer count since last FC resynchronization request + // headroom: Total headroom to keep in the downstream buffer + // adj_cap: The adjusted capacity (after headroom) of the downstream buffer + // strc_cnt: Saved count for the STRC packet (prevents mid-packet updates) + reg [63:0] send_cnt_bytes = 64'd0; + reg [39:0] send_cnt_pkts = 40'd0; + reg [63:0] recv_cnt_bytes = 64'd0; + reg [39:0] recv_cnt_pkts = 40'd0; + reg [39:0] accum_bytes = 40'd0; + reg [23:0] accum_pkts = 24'd0; + reg [15:0] headroom_bytes = 16'd0; + reg [ 7:0] headroom_pkts = 8'd0; + reg [39:0] adj_cap_bytes = 40'd0; + reg [23:0] adj_cap_pkts = 24'd0; + reg [63:0] strc_cnt_bytes = 64'd0; + + // Output transfer count + always @(posedge clk) begin + if (rst || !fc_enabled) begin + send_cnt_bytes <= 64'd0; + send_cnt_pkts <= 40'd0; + end else if (chdr_out_tvalid && chdr_out_tready) begin + send_cnt_bytes <= send_cnt_bytes + (CHDR_W/8); + if (chdr_out_tlast) + send_cnt_pkts <= send_cnt_pkts + 40'd1; + end + end + + // Buffer occupied counts + // TODO: Need better overflow handling + wire signed [64:0] occupied_bytes = + $signed({1'b0, send_cnt_bytes}) - $signed({1'b0, recv_cnt_bytes}); + wire signed [40:0] occupied_pkts = + $signed({1'b0, send_cnt_pkts}) - $signed({1'b0, recv_cnt_pkts}); + + // OK-to-Send shift register + // - Why a shift-register here? + // To allow the tools to re-time the wide comparators. + // - We don't care about the latency here because stream + // status messages are asynchronous wrt the data + reg [3:0] ok_shreg = 4'b1111; // OK to send? (shift register) + always @(posedge clk) begin + if (rst || !fc_enabled) begin + ok_shreg <= 4'b1111; + end else begin + ok_shreg <= {ok_shreg[2:0], ( + (occupied_bytes[40:0] < $signed({1'b0, adj_cap_bytes})) && + (occupied_pkts [24:0] < $signed({1'b0, adj_cap_pkts })) + )}; + end + end + wire ok_to_send = ok_shreg[3]; + + // Accumulated transfer count updater for FC resync + reg lossy_xport = 1'b0; + reg [3:0] fc_resync_req_shreg = 4'h0; + wire fc_resync_req, fc_resync_ack; + + always @(posedge clk) begin + if (rst || !fc_enabled || !lossy_xport || fc_resync_ack) begin + // Reset + accum_bytes <= 40'd0; + accum_pkts <= 24'd0; + fc_resync_req_shreg <= 4'b0000; + end else begin + if (chdr_out_tvalid && chdr_out_tready) begin + // Count + accum_bytes <= accum_bytes + (CHDR_W/8); + if (chdr_out_tlast) + accum_pkts <= accum_pkts + 24'd1; + end + // FC resync request + fc_resync_req_shreg <= {fc_resync_req_shreg[2:0], + (accum_bytes > capacity_bytes) || (accum_pkts > capacity_pkts)}; + end + end + assign fc_resync_req = fc_resync_req_shreg[3]; + + // --------------------------------------------------- + // Stream Status Parser + // --------------------------------------------------- + + wire [3:0] msg_i_tdata, msg_o_tdata; + wire msg_i_tvalid, msg_o_tvalid; + wire msg_i_tready, msg_o_tready; + + axi_fifo #(.WIDTH(4), .SIZE(1)) msg_fifo_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata(msg_i_tdata), .i_tvalid(msg_i_tvalid), .i_tready(msg_i_tready), + .o_tdata(msg_o_tdata), .o_tvalid(msg_o_tvalid), .o_tready(msg_o_tready), + .space(), .occupied() + ); + + localparam [2:0] ST_STRS_HDR = 3'd0; // Receiving the CHDR header of a stream status msg + localparam [2:0] ST_STRS_W0 = 3'd1; // Receiving the first word of a stream status msg + localparam [2:0] ST_STRS_W1 = 3'd2; // Receiving the second word of a stream status msg + localparam [2:0] ST_STRS_W2 = 3'd3; // Receiving the third word of a stream status msg + localparam [2:0] ST_STRS_W3 = 3'd4; // Receiving the fourth word of a stream status msg + localparam [2:0] ST_STRS_LATCH = 3'd5; // Atomically updating and posting the status msg + localparam [2:0] ST_STRS_DROP = 3'd6; // Something went wrong dropping current packet + + reg [2:0] strs_state = ST_STRS_HDR; + reg strs_too_long = 1'b0; + reg [15:0] cached_dst_epid = 16'd0; + reg [255:0] cached_strs_msg; + + always @(posedge clk) begin + if (rst) begin + strs_state <= ST_STRS_HDR; + strs_too_long <= 1'b0; + end else begin + case (strs_state) + + // ST_STRS_HDR + // ------------------ + ST_STRS_HDR: begin + if (s_axis_strs_tvalid) begin + // Only accept stream status packets. Drop everything else + if (chdr_get_pkt_type(s_axis_strs_tdata[63:0]) == CHDR_PKT_TYPE_STRS) + strs_state <= ST_STRS_W0; + else + strs_state <= ST_STRS_DROP; + strs_too_long <= 1'b0; + end + end + + // ST_STRS_W0 + // ------------------ + // - Cache the first word of the stream status + // - For CHDR_W == 64, this is one of 4 words. + // - For CHDR_W == 128, this is one of 2 words. + // - For CHDR_W >= 256, this is the only word. + ST_STRS_W0: begin + if (s_axis_strs_tvalid) begin + if (CHDR_W == 64) begin + cached_strs_msg[63:0] <= s_axis_strs_tdata[63:0]; + strs_state <= !s_axis_strs_tlast ? ST_STRS_W1 : ST_STRS_HDR; + end else if (CHDR_W == 128) begin + cached_strs_msg[127:0] <= s_axis_strs_tdata[127:0]; + strs_state <= !s_axis_strs_tlast ? ST_STRS_W1 : ST_STRS_HDR; + end else begin //CHDR_W >= 256 + cached_strs_msg[255:0] <= s_axis_strs_tdata[255:0]; + strs_state <= ST_STRS_LATCH; + strs_too_long <= !s_axis_strs_tlast; + end + end + end + + // ST_STRS_W1 + // ------------------ + // - Cache the second word of the stream status + ST_STRS_W1: begin + if (s_axis_strs_tvalid) begin + if (CHDR_W == 64) begin + cached_strs_msg[127:64] <= s_axis_strs_tdata[63:0]; + strs_state <= !s_axis_strs_tlast ? ST_STRS_W2 : ST_STRS_HDR; + end else begin //CHDR_W >= 128 + cached_strs_msg[255:128] <= s_axis_strs_tdata[127:0]; + strs_state <= ST_STRS_LATCH; + strs_too_long <= !s_axis_strs_tlast; + end + end + end + + // ST_STRS_W2 + // ------------------ + // - Cache the third word of the stream status + ST_STRS_W2: begin + if (s_axis_strs_tvalid) begin + cached_strs_msg[191:128] <= s_axis_strs_tdata[63:0]; + strs_state <= !s_axis_strs_tlast ? ST_STRS_W3 : ST_STRS_HDR; + end + end + + // ST_STRS_W3 + // ------------------ + // - Cache the fourth word of the stream status + ST_STRS_W3: begin + if (s_axis_strs_tvalid) begin + cached_strs_msg[255:192] <= s_axis_strs_tdata[63:0]; + strs_state <= ST_STRS_LATCH; + strs_too_long <= !s_axis_strs_tlast; + end + end + + // ST_STRS_LATCH + // ------------------ + // - Act on the received stream status + ST_STRS_LATCH: begin + capacity_bytes <= chdr256_strs_get_capacity_bytes(cached_strs_msg); + capacity_pkts <= chdr256_strs_get_capacity_pkts(cached_strs_msg); + recv_cnt_bytes <= chdr256_strs_get_xfercnt_bytes(cached_strs_msg); + recv_cnt_pkts <= chdr256_strs_get_xfercnt_pkts(cached_strs_msg); + adj_cap_bytes <= chdr256_strs_get_capacity_bytes(cached_strs_msg) - + {24'd0, headroom_bytes[15:(CHDR_W_LOG2-3)], {(CHDR_W_LOG2-3){1'b0}}}; + adj_cap_pkts <= chdr256_strs_get_capacity_pkts(cached_strs_msg) - + {16'd0, headroom_pkts}; + if (msg_i_tready) begin + strs_state <= strs_too_long ? ST_STRS_DROP : ST_STRS_HDR; + end + end + + // ST_STRS_DROP + // ------------------ + ST_STRS_DROP: begin + if (s_axis_strs_tvalid && s_axis_strs_tlast) + strs_state <= ST_STRS_HDR; + end + default: begin + // We should never get here + strs_state <= ST_STRS_HDR; + end + endcase + end + end + + assign s_axis_strs_tready = (strs_state != ST_STRS_LATCH); + + assign msg_i_tvalid = (strs_state == ST_STRS_LATCH); + assign msg_i_tdata = (chdr256_strs_get_src_epid(cached_strs_msg) != cached_dst_epid) ? + CHDR_STRS_STATUS_CMDERR : chdr256_strs_get_status(cached_strs_msg); + + + // --------------------------------------------------- + // Main State Machine + // --------------------------------------------------- + + localparam [2:0] ST_PASS_DATA = 3'd0; // Passing input axis_data out + localparam [2:0] ST_STRC_HDR = 3'd1; // Sending CHDR header for stream cmd + localparam [2:0] ST_STRC_W0 = 3'd2; // Sending first word of stream cmd + localparam [2:0] ST_STRC_W1 = 3'd3; // Sending second word of stream cmd + localparam [2:0] ST_STRC_WAIT = 3'd4; // Waiting for response (stream status) + localparam [2:0] ST_INIT_DLY = 3'd5; // Finishing command execution + + reg [2:0] state = ST_PASS_DATA; + reg mid_pkt = 1'b0; + reg [15:0] data_seq_num = 16'd0; + reg [15:0] strc_seq_num = 16'd0; + reg [2:0] cfg_delay = 3'd0; + + always @(posedge clk) begin + if (rst) begin + state <= ST_PASS_DATA; + mid_pkt <= 1'b0; + data_seq_num <= 16'd0; + strc_seq_num <= 16'd0; + cfg_pending <= 1'b0; + cfg_failed <= 1'b0; + end else begin + case (state) + + // ST_PASS_DATA + // ------------------ + // This is the default state where input data is passed to the + // output port. Flow control is enforced in this state. + // This state also serves as the launch state for a configuration + // operation (using cfg_start) + ST_PASS_DATA: begin + // Update the mid_pkt flag and sequence number + if (chdr_out_tvalid && chdr_out_tready) begin + mid_pkt <= !chdr_out_tlast; + if (chdr_out_tlast) + data_seq_num <= data_seq_num + 16'd1; + end + // Launch a configuration operation + if (cfg_start) begin + // Latch cfg command + cfg_pending <= 1'b1; + cfg_failed <= 1'b0; + // Disable flow control + fc_enabled <= 1'b0; + // Cache relevant data from the cfg cmd + lossy_xport <= cfg_lossy_xport; + cached_dst_epid <= cfg_dst_epid; + headroom_bytes <= cfg_fc_headroom_bytes; + headroom_pkts <= cfg_fc_headroom_pkts; + end + // Wait for current packet to transfer then begin the + // configuration process or stream command + if (cfg_start || cfg_pending || fc_resync_req) begin + if (mid_pkt) begin + if (chdr_out_tvalid && chdr_out_tready && chdr_out_tlast) + state <= ST_STRC_HDR; + end else begin + if (!(chdr_out_tvalid && chdr_out_tready)) + state <= ST_STRC_HDR; + end + end + end + + // ST_STRC_HDR + // ------------------ + // Send the CHDR header for a stream command + ST_STRC_HDR: begin + if (chdr_out_tvalid && chdr_out_tready) begin + state <= ST_STRC_W0; + // Update seqnum for the next packet + strc_seq_num <= strc_seq_num + 16'd1; + end + // Update byte count for stream command + strc_cnt_bytes <= send_cnt_bytes; + end + + // ST_STRC_W0 + // ------------------ + // Send the first line of a stream command + ST_STRC_W0: begin + if (chdr_out_tvalid && chdr_out_tready) + if (CHDR_W < 128) + state <= ST_STRC_W1; + else + state <= ST_STRC_WAIT; + end + + // ST_STRC_W1 + // ------------------ + // Send the second line of a stream command + ST_STRC_W1: begin + if (chdr_out_tvalid && chdr_out_tready) + state <= fc_resync_req ? ST_PASS_DATA : ST_STRC_WAIT; + end + + // ST_STRC_WAIT + // ------------------ + // Done sending stream command. Wait for a response + ST_STRC_WAIT: begin + // Wait for a new response to arrive + if (msg_o_tvalid) begin + if (msg_o_tdata == CHDR_STRS_STATUS_OKAY) begin + state <= ST_INIT_DLY; + cfg_delay <= 3'd4; + fc_enabled <= 1'b1; + data_seq_num <= 16'd0; + strc_seq_num <= 16'd0; + end else begin + state <= ST_PASS_DATA; + cfg_failed <= 1'b1; + cfg_pending <= 1'b0; + end + end + end + + // ST_INIT_DLY + // ------------------ + // Delay matching state for ok_shreg + ST_INIT_DLY: begin + if (cfg_delay == 3'd0) begin + state <= ST_PASS_DATA; + cfg_pending <= 1'b0; + end else begin + cfg_delay <= cfg_delay - 3'd1; + end + end + + // We should never get here + default: begin + state <= ST_PASS_DATA; + end + endcase + end + end + + // Header for output CHDR data + wire [CHDR_W-1:0] data_header; + assign data_header[63:0] = chdr_set_seq_num( + chdr_set_dst_epid(s_axis_data_tdata[63:0], cached_dst_epid), + data_seq_num); + generate if (CHDR_W > 64) + assign data_header[CHDR_W-1:64] = s_axis_data_tdata[CHDR_W-1:64]; + endgenerate + + // Header for stream command + wire [CHDR_W-1:0] strc_header; + assign strc_header[63:0] = chdr_build_header( + /*VC*/ 6'd0, /*eob*/ 1'b0, /*eov*/ 1'b0, CHDR_PKT_TYPE_STRC, CHDR_NO_MDATA, + strc_seq_num, 16'd16+(CHDR_W/8), cached_dst_epid); + generate if (CHDR_W > 64) + assign strc_header[CHDR_W-1:64] = {(CHDR_W-64){1'b0}}; + endgenerate + + // Payload for stream command + wire [127:0] strc_init_payload = chdr128_strc_build( + {24'h0, cfg_fc_freq_bytes}, {16'h0, cfg_fc_freq_pkts}, + /*op_data*/ 4'h0, CHDR_STRC_OPCODE_INIT, cfg_this_epid); + wire [127:0] strc_resync_payload = chdr128_strc_build( + strc_cnt_bytes, send_cnt_pkts, + /*op_data*/ 4'h0, CHDR_STRC_OPCODE_RESYNC, cfg_this_epid); + wire [127:0] strc_payload = fc_resync_req ? strc_resync_payload : strc_init_payload; + + always @(*) begin + case (state) + ST_PASS_DATA: begin + chdr_out_tdata = mid_pkt ? s_axis_data_tdata : data_header; + chdr_out_tlast = s_axis_data_tlast; + chdr_out_tvalid = s_axis_data_tvalid && ok_to_send; + end + ST_STRC_HDR: begin + chdr_out_tdata = strc_header; + chdr_out_tlast = 1'b0; + chdr_out_tvalid = ok_to_send; + end + ST_STRC_W0: begin + chdr_out_tdata = strc_payload; + chdr_out_tlast = (CHDR_W < 128) ? 1'b0 : 1'b1; + chdr_out_tvalid = ok_to_send; + end + ST_STRC_W1: begin + // We will enter this state only if CHDR_W = 64 + chdr_out_tdata = strc_payload[127:64]; + chdr_out_tlast = 1'b1; + chdr_out_tvalid = ok_to_send; + end + default: begin + chdr_out_tdata = {CHDR_W{1'b0}}; + chdr_out_tlast = 1'b0; + chdr_out_tvalid = 1'b0; + end + endcase + end + assign s_axis_data_tready = (state == ST_PASS_DATA) && chdr_out_tready && ok_to_send; + + // Consume all messages when passing data forward. The flow control state is automatically + // updated outside the message FIFO. When a stream command is issued, we wait for the + // "wait" state to consume responses. + assign msg_o_tready = msg_o_tvalid && (state == ST_PASS_DATA || state == ST_STRC_WAIT); + + // Acknowledge a flow control resync command + assign fc_resync_ack = fc_resync_req && (state == ST_STRC_W1) && + chdr_out_tvalid && chdr_out_tready && chdr_out_tlast; + + // --------------------------------------------------- + // Stream Status Reporting + // --------------------------------------------------- + + wire runtime_err_stb = msg_o_tvalid && msg_o_tready && (state == ST_PASS_DATA); + assign seq_err_stb = runtime_err_stb && (msg_o_tdata == CHDR_STRS_STATUS_SEQERR); + assign data_err_stb = runtime_err_stb && (msg_o_tdata == CHDR_STRS_STATUS_DATAERR); + assign route_err_stb = runtime_err_stb && (msg_o_tdata == CHDR_STRS_STATUS_RTERR); + + always @(posedge clk) begin + if (rst || !fc_enabled) begin + seq_err_cnt <= 32'd0; + data_err_cnt <= 32'd0; + route_err_cnt <= 32'd0; + end else begin + if (seq_err_stb) + seq_err_cnt <= seq_err_cnt + 32'd1; + if (data_err_stb) + data_err_cnt <= data_err_cnt + 32'd1; + if (route_err_stb) + route_err_cnt <= route_err_cnt + 32'd1; + end + end + +endmodule // chdr_stream_output diff --git a/fpga/usrp3/lib/rfnoc/core/chdr_to_axis_ctrl.v b/fpga/usrp3/lib/rfnoc/core/chdr_to_axis_ctrl.v new file mode 100644 index 000000000..1f9dba2eb --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/chdr_to_axis_ctrl.v @@ -0,0 +1,319 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_to_axis_ctrl +// Description: +// Converts from CHDR to AXIS-Control and vice versa. +// This module has to handle remote control transactions +// correctly. The CHDR frame has/needs the DstEPID, DstPort +// SrcEPID and SrcPort and the AXIS-Ctrl frame has/needs +// the DstPort, SrcPort, RemDstEPID and RemDstPort. +// +// Parameters: +// - CHDR_W: Width of the CHDR bus in bits +// - THIS_PORTID: The port number of the control xbar +// that this module is connected to. +// +// Signals: +// - s_rfnoc_chdr_* : Input CHDR stream (AXI-Stream) +// - m_rfnoc_chdr_* : Output CHDR stream (AXI-Stream) +// - s_rfnoc_ctrl_* : Input control stream (AXI-Stream) +// - m_rfnoc_ctrl_* : Output control stream (AXI-Stream) + +module chdr_to_axis_ctrl #( + parameter CHDR_W = 256, + parameter [9:0] THIS_PORTID = 10'd0 +)( + // CHDR Bus (master and slave) + input wire rfnoc_chdr_clk, + input wire rfnoc_chdr_rst, + input wire [15:0] this_epid, + input wire [CHDR_W-1:0] s_rfnoc_chdr_tdata, + input wire s_rfnoc_chdr_tlast, + input wire s_rfnoc_chdr_tvalid, + output wire s_rfnoc_chdr_tready, + output wire [CHDR_W-1:0] m_rfnoc_chdr_tdata, + output wire m_rfnoc_chdr_tlast, + output wire m_rfnoc_chdr_tvalid, + input wire m_rfnoc_chdr_tready, + // AXIS-Control Bus (master and slave) + input wire rfnoc_ctrl_clk, + input wire rfnoc_ctrl_rst, + input wire [31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + output wire [31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_axis_ctrl_utils.vh" + + localparam [1:0] ST_CHDR_HDR = 2'd0; // Processing the CHDR header + localparam [1:0] ST_CHDR_MDATA = 2'd1; // Processing the CHDR metadata + localparam [1:0] ST_CTRL_HDR = 2'd2; // Processing the CHDR control header + localparam [1:0] ST_CTRL_BODY = 2'd3; // Processing the CHDR control body + + // --------------------------------------------------- + // Input/output register slices + // --------------------------------------------------- + // - ch2ct: CHDR to Ctrl + // - ct2ch: Ctrl to CHDR + + wire [CHDR_W-1:0] ch2ct_tdata, ct2ch_tdata; + wire ch2ct_tlast, ct2ch_tlast; + wire ch2ct_tvalid, ct2ch_tvalid; + wire ch2ct_tready, ct2ch_tready; + + axi_fifo #(.WIDTH(CHDR_W+1), .SIZE(1)) ch2ct_reg_i ( + .clk(rfnoc_chdr_clk), .reset(rfnoc_chdr_rst), .clear(1'b0), + .i_tdata({s_rfnoc_chdr_tlast, s_rfnoc_chdr_tdata}), + .i_tvalid(s_rfnoc_chdr_tvalid), .i_tready(s_rfnoc_chdr_tready), + .o_tdata({ch2ct_tlast, ch2ct_tdata}), + .o_tvalid(ch2ct_tvalid), .o_tready(ch2ct_tready), + .space(), .occupied() + ); + + axi_fifo #(.WIDTH(CHDR_W+1), .SIZE(1)) ct2ch_reg_i ( + .clk(rfnoc_chdr_clk), .reset(rfnoc_chdr_rst), .clear(1'b0), + .i_tdata({ct2ch_tlast, ct2ch_tdata}), + .i_tvalid(ct2ch_tvalid), .i_tready(ct2ch_tready), + .o_tdata({m_rfnoc_chdr_tlast, m_rfnoc_chdr_tdata}), + .o_tvalid(m_rfnoc_chdr_tvalid), .o_tready(m_rfnoc_chdr_tready), + .space(), .occupied() + ); + + // --------------------------------------------------- + // CH2CT: CHDR => Ctrl path + // --------------------------------------------------- + // When converting CHDR => Ctrl we know we are dealing with + // a remote control transaction so we need to perform + // the following transformations to ensure that the packet + // has all the info to route downstream and has enough info + // to return to the master (of the transaction). + // - Use the CHDR DstPort as the Ctrl DstPort (forward the master's request) + // - Use THIS_PORTID as the Ctrl SrcPort (for the return path back here) + // - Use the CHDR SrcEPID as the Ctrl RemDstEPID (return path for CHDR packet) + // - Use the CHDR SrcPort as the Ctrl RemDstPort (return path in the downstream EP) + // - Ignore the CHDR DstEPID because packet is already here + + reg [1:0] ch2ct_state = ST_CHDR_HDR; + reg [4:0] ch2ct_nmdata = CHDR_NO_MDATA; + + always @(posedge rfnoc_chdr_clk) begin + if (rfnoc_chdr_rst) begin + ch2ct_state <= ST_CHDR_HDR; + end else if (ch2ct_tvalid && ch2ct_tready) begin + case (ch2ct_state) + ST_CHDR_HDR: begin + ch2ct_nmdata <= chdr_get_num_mdata(ch2ct_tdata[63:0]) - 5'd1; + if (!ch2ct_tlast) + ch2ct_state <= (chdr_get_num_mdata(ch2ct_tdata[63:0]) == 5'd0) ? + ST_CTRL_HDR : ST_CHDR_MDATA; + else + ch2ct_state <= ST_CHDR_HDR; // Premature termination + end + ST_CHDR_MDATA: begin + ch2ct_nmdata <= ch2ct_nmdata - 5'd1; + if (!ch2ct_tlast) + ch2ct_state <= (ch2ct_nmdata == CHDR_NO_MDATA) ? ST_CTRL_HDR : ST_CHDR_MDATA; + else + ch2ct_state <= ST_CHDR_HDR; // Premature termination + end + ST_CTRL_HDR: begin + ch2ct_state <= ch2ct_tlast ? ST_CHDR_HDR : ST_CTRL_BODY; + end + ST_CTRL_BODY: begin + if (ch2ct_tlast) + ch2ct_state <= ST_CHDR_HDR; + end + default: begin + // We should never get here + ch2ct_state <= ST_CHDR_HDR; + end + endcase + end + end + + wire [(CHDR_W/32)-1:0] ch2ct_tkeep; + chdr_compute_tkeep #(.CHDR_W(CHDR_W), .ITEM_W(32)) chdr_tkeep_gen_i ( + .clk(rfnoc_chdr_clk), .rst(rfnoc_chdr_rst), + .axis_tdata(ch2ct_tdata), .axis_tlast(ch2ct_tlast), + .axis_tvalid(ch2ct_tvalid), .axis_tready(ch2ct_tready), + .axis_tkeep(ch2ct_tkeep) + ); + + // Create the first two lines of the Ctrl word (wide) + // using data from CHDR packet + wire [CHDR_W-1:0] ch2ct_new_ctrl_hdr; + assign ch2ct_new_ctrl_hdr[63:0] = { + axis_ctrl_build_hdr_hi( + axis_ctrl_get_src_port(ch2ct_tdata[31:0]), + axis_ctrl_get_rem_dst_epid(ch2ct_tdata[63:32]) + ), + axis_ctrl_build_hdr_lo( + axis_ctrl_get_is_ack (ch2ct_tdata[31:0]), + axis_ctrl_get_has_time(ch2ct_tdata[31:0]), + axis_ctrl_get_seq_num (ch2ct_tdata[31:0]), + axis_ctrl_get_num_data(ch2ct_tdata[31:0]), + THIS_PORTID, + axis_ctrl_get_dst_port(ch2ct_tdata[31:0]) + ) + }; + generate if (CHDR_W > 64) begin + assign ch2ct_new_ctrl_hdr[CHDR_W-1:64] = ch2ct_tdata[CHDR_W-1:64]; + end endgenerate + + wire [CHDR_W-1:0] ch2ct_wctrl_tdata = + (ch2ct_state == ST_CTRL_HDR) ? ch2ct_new_ctrl_hdr : ch2ct_tdata; + + axis_width_conv #( + .WORD_W(32), .IN_WORDS(CHDR_W/32), .OUT_WORDS(1), + .SYNC_CLKS(0), .PIPELINE("OUT") + ) ctrl_downsizer_i ( + .s_axis_aclk(rfnoc_chdr_clk), .s_axis_rst(rfnoc_chdr_rst), + .s_axis_tdata(ch2ct_wctrl_tdata), + .s_axis_tkeep(ch2ct_tkeep), + .s_axis_tlast(ch2ct_tlast), + .s_axis_tvalid(ch2ct_tvalid && (ch2ct_state == ST_CTRL_HDR || ch2ct_state == ST_CTRL_BODY)), + .s_axis_tready(ch2ct_tready), + .m_axis_aclk(rfnoc_ctrl_clk), .m_axis_rst(rfnoc_ctrl_rst), + .m_axis_tdata(m_rfnoc_ctrl_tdata), + .m_axis_tkeep(/* Unused: OUT_WORDS=1 */), + .m_axis_tlast(m_rfnoc_ctrl_tlast), + .m_axis_tvalid(m_rfnoc_ctrl_tvalid), + .m_axis_tready(m_rfnoc_ctrl_tready) + ); + + // --------------------------------------------------- + // CT2CH: Ctrl => CHDR path + // --------------------------------------------------- + // When converting Ctrl => CHDR we know we are dealing with + // a remote control transaction so we need to perform + // the following transformations to ensure that the packet + // has all the info to route downstream and has enough info + // to return to the initiator of the transaction. + // - Use the Ctrl RemDstEPID as the CHDR DstEPID (forward the master's request) + // - Use the Ctrl RemDstPort as the CHDR DstPort (forward the master's request) + // - Use the this_epid as CHDR SrcEPID (return path for the CHDR packet) + // - Use the Ctrl SrcPort as the CHDR SrcPort (return path to the master) + // - Ignore the Ctrl DstPort because the packet has already been routed + + wire [CHDR_W-1:0] ct2ch_wctrl_tdata; + wire ct2ch_wctrl_tlast, ct2ch_wctrl_tvalid, ct2ch_wctrl_tready; + + axis_width_conv #( + .WORD_W(32), .IN_WORDS(1), .OUT_WORDS(CHDR_W/32), + .SYNC_CLKS(0), .PIPELINE("IN") + ) ctrl_upsizer_i ( + .s_axis_aclk(rfnoc_ctrl_clk), .s_axis_rst(rfnoc_ctrl_rst), + .s_axis_tdata(s_rfnoc_ctrl_tdata), + .s_axis_tkeep(/* Unused: IN_WORDS=1 */), + .s_axis_tlast(s_rfnoc_ctrl_tlast), + .s_axis_tvalid(s_rfnoc_ctrl_tvalid), + .s_axis_tready(s_rfnoc_ctrl_tready), + .m_axis_aclk(rfnoc_chdr_clk), .m_axis_rst(rfnoc_chdr_rst), + .m_axis_tdata(ct2ch_wctrl_tdata), + .m_axis_tkeep(/* Unused: We are updating the CHDR length */), + .m_axis_tlast(ct2ch_wctrl_tlast), + .m_axis_tvalid(ct2ch_wctrl_tvalid), + .m_axis_tready(ct2ch_wctrl_tready) + ); + + reg [1:0] ct2ch_state = ST_CHDR_HDR; + reg [15:0] ct2ch_seqnum = 16'd0; + + always @(posedge rfnoc_chdr_clk) begin + if (rfnoc_chdr_rst) begin + ct2ch_state <= ST_CHDR_HDR; + ct2ch_seqnum <= 16'd0; + end else if (ct2ch_tvalid && ct2ch_tready) begin + case (ct2ch_state) + ST_CHDR_HDR: begin + if (!ct2ch_tlast) + ct2ch_state <= ST_CTRL_HDR; + end + ST_CTRL_HDR: begin + if (ct2ch_tlast) + ct2ch_state <= ST_CHDR_HDR; + else + ct2ch_state <= ST_CTRL_BODY; + end + ST_CTRL_BODY: begin + if (ct2ch_tlast) + ct2ch_state <= ST_CHDR_HDR; + end + default: begin + // We should never get here + ct2ch_state <= ST_CHDR_HDR; + end + endcase + if (ct2ch_tlast) + ct2ch_seqnum <= ct2ch_seqnum + 16'd1; + end + end + + // Hold the first line to generate info for the outgoing CHDR header + assign ct2ch_wctrl_tready = (ct2ch_state == ST_CTRL_HDR || ct2ch_state == ST_CTRL_BODY) ? ct2ch_tready : 1'b0; + + wire [7:0] ct2ch_32bit_lines = 8'd3 + // Header + OpWord + (axis_ctrl_get_has_time(ct2ch_wctrl_tdata[31:0]) ? 8'd2 : 8'd0) + // Timestamp + ({4'h0, axis_ctrl_get_num_data(ct2ch_wctrl_tdata[31:0])}); // Data words + + wire [15:0] ct2ch_chdr_lines = 16'd1 + // CHDR header + ct2ch_32bit_lines[7:$clog2(CHDR_W/32)] + // Convert 32-bit lines to CHDR_W + (|ct2ch_32bit_lines[$clog2(CHDR_W/32)-1:0]); // Residue + + reg [63:0] ct2ch_chdr_tdata; + always @(*) begin + case (ct2ch_state) + ST_CHDR_HDR: begin + ct2ch_chdr_tdata = chdr_build_header( + 6'd0, /* VC */ + 1'b0, 1'b0, /* eob, eov */ + CHDR_PKT_TYPE_CTRL, + CHDR_NO_MDATA, + ct2ch_seqnum, + (ct2ch_chdr_lines << $clog2(CHDR_W/8)), /* length in bytes */ + axis_ctrl_get_rem_dst_epid(ct2ch_wctrl_tdata[63:32]) + ); + end + ST_CTRL_HDR: begin + ct2ch_chdr_tdata = { + axis_ctrl_build_hdr_hi( + 10'd0, /* Unused in CHDR Control payload */ + this_epid /* This is the SrcEPID */ + ), + axis_ctrl_build_hdr_lo( + axis_ctrl_get_is_ack (ct2ch_wctrl_tdata[31:0]), + axis_ctrl_get_has_time(ct2ch_wctrl_tdata[31:0]), + axis_ctrl_get_seq_num (ct2ch_wctrl_tdata[31:0]), + axis_ctrl_get_num_data(ct2ch_wctrl_tdata[31:0]), + axis_ctrl_get_src_port(ct2ch_wctrl_tdata[31:0]), + axis_ctrl_get_rem_dst_port(ct2ch_wctrl_tdata[63:32]) + ) + }; + end + default: begin + ct2ch_chdr_tdata = ct2ch_wctrl_tdata[63:0]; + end + endcase + end + + // Output signals + assign ct2ch_tdata[63:0] = ct2ch_chdr_tdata; + assign ct2ch_tlast = ct2ch_wctrl_tlast; + assign ct2ch_tvalid = ct2ch_wctrl_tvalid; + generate if (CHDR_W > 64) begin + assign ct2ch_tdata[CHDR_W-1:64] = ct2ch_wctrl_tdata[CHDR_W-1:64]; + end endgenerate + +endmodule // chdr_to_axis_ctrl diff --git a/fpga/usrp3/lib/rfnoc/core/chdr_to_axis_data.v b/fpga/usrp3/lib/rfnoc/core/chdr_to_axis_data.v new file mode 100644 index 000000000..a00a9952c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/chdr_to_axis_data.v @@ -0,0 +1,422 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_to_axis_data +// +// Description: +// +// A deframer module for CHDR data packets. It accepts an input CHDR stream +// and produces an output data stream that includes the payload of the +// packet, as well as timestamp and packet flags presented as sideband +// information. +// +// This module also performs an optional clock crossing and data width +// conversion from CHDR_W to a user requested width for the payload data bus. +// +// Parameters: +// - CHDR_W : Width of the input CHDR bus in bits +// - ITEM_W : Width of the output item bus in bits +// - NIPC : The number of output items delivered per cycle +// - SYNC_CLKS : Are the CHDR and data clocks synchronous to each other? +// - INFO_FIFO_SIZE : Log2 of the FIFO size for the packet info data path +// - PYLD_FIFO_SIZE : Log2 of the FIFO size for the payload data path +// +// Signals: +// - s_axis_chdr_* : Input CHDR stream (AXI-Stream) +// - m_axis_* : Output payload data stream (AXI-Stream) +// - m_axis_mdata_* : Output mdata stream (AXI-Stream) +// - flush_* : Signals for flush control and status +// + +module chdr_to_axis_data #( + parameter CHDR_W = 256, + parameter ITEM_W = 32, + parameter NIPC = 2, + parameter SYNC_CLKS = 0, + parameter INFO_FIFO_SIZE = 5, + parameter PYLD_FIFO_SIZE = 5 +)( + // Clock, reset and settings + input wire axis_chdr_clk, + input wire axis_chdr_rst, + input wire axis_data_clk, + input wire axis_data_rst, + // CHDR in (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_chdr_tdata, + input wire s_axis_chdr_tlast, + input wire s_axis_chdr_tvalid, + output wire s_axis_chdr_tready, + // Payload data stream out (AXI-Stream) + output wire [(ITEM_W*NIPC)-1:0] m_axis_tdata, + output wire [NIPC-1:0] m_axis_tkeep, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready, + // Payload sideband information + output wire [63:0] m_axis_ttimestamp, + output wire m_axis_thas_time, + output wire [15:0] m_axis_tlength, + output wire m_axis_teob, + output wire m_axis_teov, + // Flush signals + input wire flush_en, + input wire [31:0] flush_timeout, + output wire flush_active, + output wire flush_done +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_axis_ctrl_utils.vh" + + // --------------------------------------------------- + // Pipeline + // --------------------------------------------------- + localparam CHDR_KEEP_W = CHDR_W/ITEM_W; + + wire [CHDR_W-1:0] in_chdr_tdata; + wire [CHDR_KEEP_W-1:0] in_chdr_tkeep; + wire in_chdr_tlast, in_chdr_tvalid; + reg in_chdr_tready; + + axi_fifo_flop2 #(.WIDTH(CHDR_W+1)) in_pipe_i ( + .clk(axis_chdr_clk), .reset(axis_chdr_rst), .clear(1'b0), + .i_tdata({s_axis_chdr_tlast, s_axis_chdr_tdata}), + .i_tvalid(s_axis_chdr_tvalid), .i_tready(s_axis_chdr_tready), + .o_tdata({in_chdr_tlast, in_chdr_tdata}), + .o_tvalid(in_chdr_tvalid), .o_tready(in_chdr_tready), + .space(), .occupied() + ); + + chdr_compute_tkeep #(.CHDR_W(CHDR_W), .ITEM_W(ITEM_W)) tkeep_gen_i ( + .clk(axis_chdr_clk), .rst(axis_chdr_rst), + .axis_tdata(in_chdr_tdata), .axis_tlast(in_chdr_tlast), + .axis_tvalid(in_chdr_tvalid), .axis_tready(in_chdr_tready), + .axis_tkeep(in_chdr_tkeep) + ); + + // --------------------------------------------------- + // Input State Machine + // --------------------------------------------------- + localparam INFO_W = 64+1+16+1+1; // timestamp, has_time, length, eob, eov + + wire [CHDR_W-1:0] in_pyld_tdata; + wire [CHDR_KEEP_W-1:0] in_pyld_tkeep; + wire in_pyld_tlast, in_pyld_tvalid, in_pyld_tready; + + reg [INFO_W-1:0] in_info_tdata; + reg in_info_tvalid; + wire in_info_tready; + + localparam [2:0] ST_HDR = 3'd0; // Processing the input CHDR header + localparam [2:0] ST_TS = 3'd1; // Processing the input CHDR timestamp + localparam [2:0] ST_MDATA = 3'd2; // Processing the input CHDR metadata word + localparam [2:0] ST_BODY = 3'd3; // Processing the input CHDR payload word + localparam [2:0] ST_DROP = 3'd4; // Something went wrong... Dropping packet + + reg [2:0] state = ST_HDR; + reg [4:0] mdata_pending = CHDR_NO_MDATA; + + reg [15:0] chdr_length_reg; + reg chdr_eob_reg, chdr_eov_reg; + + // Shortcuts: CHDR header + wire [2:0] in_pkt_type = chdr_get_pkt_type(in_chdr_tdata[63:0]); + wire [4:0] in_num_mdata = chdr_get_num_mdata(in_chdr_tdata[63:0]); + + always @(posedge axis_chdr_clk) begin + if (axis_chdr_rst) begin + state <= ST_HDR; + end else if (in_chdr_tvalid & in_chdr_tready) begin + case (state) + + // ST_HDR: CHDR Header + // ------------------- + ST_HDR: begin + // Always cache the number of metadata words + mdata_pending <= in_num_mdata; + // Figure out the next state + if (!in_chdr_tlast) begin + if (CHDR_W > 64) begin + // When CHDR_W > 64, the timestamp is a part of the header word. + // If this is a data packet (with/without a TS), we move on to the metadata/body + // state otherwise we drop it. Non-data packets should never reach here. + if (in_pkt_type == CHDR_PKT_TYPE_DATA || in_pkt_type == CHDR_PKT_TYPE_DATA_TS) begin + if (in_num_mdata != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_BODY; + end + end else begin + state <= ST_DROP; + end + end else begin + // When CHDR_W == 64, the timestamp comes after the header. Check if this is a data + // packet with a TS to figure out the next state. If no TS, then check for metadata + // to move to the next state. Drop any non-data packets. + chdr_length_reg <= chdr_calc_payload_length(CHDR_W, in_chdr_tdata); + chdr_eob_reg <= chdr_get_eob(in_chdr_tdata); + chdr_eov_reg <= chdr_get_eov(in_chdr_tdata); + if (in_pkt_type == CHDR_PKT_TYPE_DATA_TS) begin + state <= ST_TS; + end else if (in_pkt_type == CHDR_PKT_TYPE_DATA) begin + if (in_num_mdata != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_BODY; + end + end else begin + state <= ST_DROP; + end + end + end else begin // Premature termination + // Packets must have at least one payload line + state <= ST_HDR; + end + end + + // ST_TS: Timestamp (CHDR_W == 64 only) + // ------------------------------------ + ST_TS: begin + if (!in_chdr_tlast) begin + if (mdata_pending != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_BODY; + end + end else begin // Premature termination + // Packets must have at least one payload line + state <= ST_HDR; + end + end + + // ST_MDATA: Metadata word + // ----------------------- + ST_MDATA: begin + if (!in_chdr_tlast) begin + // Count down metadata and stop at 1 + if (mdata_pending == 5'd1) begin + state <= ST_BODY; + end else begin + mdata_pending <= mdata_pending - 5'd1; + end + end else begin // Premature termination + // Packets must have at least one payload line + state <= ST_HDR; + end + end + + // ST_BODY: Payload word + // --------------------- + ST_BODY: begin + if (in_chdr_tlast) begin + state <= ST_HDR; + end + end + + // ST_DROP: Drop current packet + // ---------------------------- + ST_DROP: begin + if (in_chdr_tlast) begin + state <= ST_HDR; + end + end + + default: begin + // We should never get here + state <= ST_HDR; + end + endcase + end + end + + // CHDR data goes to the payload stream only in the BODY state. Packets are + // expected to have at least one payload word so the CHDR tlast can be used + // as the payload tlast. + assign in_pyld_tdata = in_chdr_tdata; + assign in_pyld_tkeep = in_chdr_tkeep; + assign in_pyld_tlast = in_chdr_tlast; + assign in_pyld_tvalid = in_chdr_tvalid && (state == ST_BODY); + + always @(*) begin + // Packet timestamp and flags go into the info FIFO, but only if it's a + // data packet since non-data packets will be discarded. + if (CHDR_W > 64) begin + // When CHDR_W > 64, all info will be in the first word of the CHDR packet + in_info_tdata = { in_chdr_tdata[127:64], + chdr_get_has_time(in_chdr_tdata), + chdr_calc_payload_length(CHDR_W, in_chdr_tdata), + chdr_get_eob(in_chdr_tdata), + chdr_get_eov(in_chdr_tdata) }; + in_info_tvalid = in_chdr_tvalid && (state == ST_HDR && + (in_pkt_type == CHDR_PKT_TYPE_DATA || in_pkt_type == CHDR_PKT_TYPE_DATA_TS)); + end else begin + // When CHDR_W == 64, the flags will be in the first word of the packet, + // but the timestamp will be in the second word, if there is a timestamp. + if (state == ST_HDR && in_pkt_type == CHDR_PKT_TYPE_DATA) begin + // No timestamp in this case + in_info_tdata = { in_chdr_tdata[63:0], 1'b0, + chdr_calc_payload_length(CHDR_W, in_chdr_tdata), + chdr_get_eob(in_chdr_tdata), chdr_get_eov(in_chdr_tdata) }; + in_info_tvalid = in_chdr_tvalid; + end else begin + // Assuming timestamp is present, so use flags from previous clock cycle + in_info_tdata = { in_chdr_tdata[63:0], 1'b1, chdr_length_reg, + chdr_eob_reg, chdr_eov_reg }; + in_info_tvalid = in_chdr_tvalid && (state == ST_TS); + end + end + + case (state) + ST_HDR : in_chdr_tready = in_info_tready; + ST_TS : in_chdr_tready = in_info_tready; + ST_MDATA : in_chdr_tready = 1'b1; + ST_BODY : in_chdr_tready = in_pyld_tready; + ST_DROP : in_chdr_tready = 1'b1; + default : in_chdr_tready = 1'b0; + endcase + end + + // --------------------------------------------------- + // Payload and mdata FIFOs + // --------------------------------------------------- + wire [CHDR_W-1:0] out_pyld_tdata; + wire [CHDR_KEEP_W-1:0] out_pyld_tkeep; + wire out_pyld_tlast, out_pyld_tvalid, out_pyld_tready; + + wire [INFO_W-1:0] out_info_tdata; + wire out_info_tvalid, out_info_tready; + + wire [(ITEM_W*NIPC)-1:0] conv_pyld_tdata; + wire [NIPC-1:0] conv_pyld_tkeep; + wire conv_pyld_tlast, conv_pyld_tvalid, conv_pyld_tready; + + + generate if (SYNC_CLKS) begin : gen_sync_fifo + axi_fifo #(.WIDTH(INFO_W), .SIZE(INFO_FIFO_SIZE)) info_fifo_i ( + .clk(axis_data_clk), .reset(axis_data_rst), .clear(1'b0), + .i_tdata(in_info_tdata), + .i_tvalid(in_info_tvalid), .i_tready(in_info_tready), + .o_tdata(out_info_tdata), + .o_tvalid(out_info_tvalid), .o_tready(out_info_tready), + .space(), .occupied() + ); + axi_fifo #(.WIDTH(CHDR_W+CHDR_KEEP_W+1), .SIZE(PYLD_FIFO_SIZE)) pyld_fifo_i ( + .clk(axis_data_clk), .reset(axis_data_rst), .clear(1'b0), + .i_tdata({in_pyld_tlast, in_pyld_tkeep, in_pyld_tdata}), + .i_tvalid(in_pyld_tvalid), .i_tready(in_pyld_tready), + .o_tdata({out_pyld_tlast, out_pyld_tkeep, out_pyld_tdata}), + .o_tvalid(out_pyld_tvalid), .o_tready(out_pyld_tready), + .space(), .occupied() + ); + end else begin : gen_async_fifo + axi_fifo_2clk #(.WIDTH(INFO_W), .SIZE(INFO_FIFO_SIZE)) info_fifo_i ( + .reset(axis_chdr_rst), + .i_aclk(axis_chdr_clk), + .i_tdata(in_info_tdata), + .i_tvalid(in_info_tvalid), .i_tready(in_info_tready), + .o_aclk(axis_data_clk), + .o_tdata(out_info_tdata), + .o_tvalid(out_info_tvalid), .o_tready(out_info_tready) + ); + axi_fifo_2clk #(.WIDTH(CHDR_W+CHDR_KEEP_W+1), .SIZE(PYLD_FIFO_SIZE)) pyld_fifo_i ( + .reset(axis_chdr_rst), + .i_aclk(axis_chdr_clk), + .i_tdata({in_pyld_tlast, in_pyld_tkeep, in_pyld_tdata}), + .i_tvalid(in_pyld_tvalid), .i_tready(in_pyld_tready), + .o_aclk(axis_data_clk), + .o_tdata({out_pyld_tlast, out_pyld_tkeep, out_pyld_tdata}), + .o_tvalid(out_pyld_tvalid), .o_tready(out_pyld_tready) + ); + end endgenerate + + // --------------------------------------------------- + // Data Width Converter: CHDR_W => ITEM_W*NIPC + // --------------------------------------------------- + generate + if (CHDR_W != ITEM_W*NIPC) begin : gen_axis_width_conv + axis_width_conv #( + .WORD_W(ITEM_W), .IN_WORDS(CHDR_W/ITEM_W), .OUT_WORDS(NIPC), + .SYNC_CLKS(1), .PIPELINE("NONE") + ) payload_width_conv_i ( + .s_axis_aclk(axis_data_clk), .s_axis_rst(axis_data_rst), + .s_axis_tdata(out_pyld_tdata), .s_axis_tkeep(out_pyld_tkeep), + .s_axis_tlast(out_pyld_tlast), .s_axis_tvalid(out_pyld_tvalid), + .s_axis_tready(out_pyld_tready), + .m_axis_aclk(axis_data_clk), .m_axis_rst(axis_data_rst), + .m_axis_tdata(conv_pyld_tdata), .m_axis_tkeep(conv_pyld_tkeep), + .m_axis_tlast(conv_pyld_tlast), .m_axis_tvalid(conv_pyld_tvalid), + .m_axis_tready(conv_pyld_tready) + ); + end else begin : no_gen_axis_width_conv + assign conv_pyld_tdata = out_pyld_tdata; + assign conv_pyld_tkeep = out_pyld_tkeep; + assign conv_pyld_tlast = out_pyld_tlast; + assign conv_pyld_tvalid = out_pyld_tvalid; + assign out_pyld_tready = conv_pyld_tready; + end + endgenerate + + // --------------------------------------------------- + // Merge payload and info streams + // --------------------------------------------------- + // There should be one info word for each payload packet. + wire [INFO_W+(ITEM_W+1)*NIPC-1:0] flush_tdata; + wire flush_tlast; + wire flush_tvalid; + wire flush_tready; + + assign flush_tdata = { out_info_tdata, conv_pyld_tkeep, conv_pyld_tdata }; + assign flush_tlast = conv_pyld_tlast; + assign flush_tvalid = conv_pyld_tvalid && out_info_tvalid; + assign conv_pyld_tready = flush_tready && out_info_tvalid; + assign out_info_tready = conv_pyld_tready && conv_pyld_tlast && conv_pyld_tvalid; + + // --------------------------------------------------- + // Flushing Logic + // --------------------------------------------------- + wire [31:0] flush_timeout_dclk; + wire flush_en_dclk; + wire flush_active_pyld_cclk; + wire flush_done_pyld_cclk; + wire flush_active_pyld; + wire flush_done_pyld; + + synchronizer #(.WIDTH(2), .INITIAL_VAL(4'd0)) flush_2clk_rb_i ( + .clk(axis_chdr_clk), .rst(1'b0), + .in({flush_active_pyld, flush_done_pyld}), + .out({flush_active_pyld_cclk, flush_done_pyld_cclk}) + ); + assign flush_active = flush_active_pyld_cclk; + assign flush_done = flush_done_pyld_cclk; + + axi_fifo_2clk #(.WIDTH(33), .SIZE(1)) flush_2clk_ctrl_i ( + .reset(axis_chdr_rst), + .i_aclk(axis_chdr_clk), + .i_tdata({flush_en, flush_timeout}), .i_tvalid(1'b1), .i_tready(), + .o_aclk(axis_data_clk), + .o_tdata({flush_en_dclk, flush_timeout_dclk}), .o_tvalid(), .o_tready(1'b1) + ); + + axis_packet_flush #( + .WIDTH(INFO_W+(ITEM_W+1)*NIPC), .FLUSH_PARTIAL_PKTS(0), .TIMEOUT_W(32), .PIPELINE("OUT") + ) pyld_flusher_i ( + .clk(axis_data_clk), .reset(axis_data_rst), + .enable(flush_en_dclk), .timeout(flush_timeout_dclk), + .flushing(flush_active_pyld), .done(flush_done_pyld), + .s_axis_tdata(flush_tdata), + .s_axis_tlast(flush_tlast), + .s_axis_tvalid(flush_tvalid), + .s_axis_tready(flush_tready), + .m_axis_tdata({m_axis_ttimestamp, m_axis_thas_time, m_axis_tlength, + m_axis_teob, m_axis_teov, m_axis_tkeep, m_axis_tdata}), + .m_axis_tlast(m_axis_tlast), + .m_axis_tvalid(m_axis_tvalid), + .m_axis_tready(m_axis_tready) + ); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/core/chdr_to_axis_data_mdata.v b/fpga/usrp3/lib/rfnoc/core/chdr_to_axis_data_mdata.v new file mode 100644 index 000000000..90eb5c767 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/chdr_to_axis_data_mdata.v @@ -0,0 +1,538 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_to_axis_data_mdata +// +// Description: +// +// A deframer module for CHDR data packets. It accepts an input CHDR stream, +// and produces two output streams: +// +// 1) Payload data, which includes the payload of the packet, as well as +// timestamp and packet flags presented as sideband information. +// 2) Metadata (mdata), which contains only the metadata of the packet. +// +// This module also performs an optional clock crossing and data width +// conversion from CHDR_W to a user requested width for the payload data bus. +// +// The metadata and data packets are interleaved, i.e., a mdata packet will +// arrive before its corresponding data packet. However, if mdata prefetching +// is enabled, the mdata for the next packet might arrive before the data for +// the current packet has been consumed. In the case of a rate reduction, +// this allows the module to sustain a gapless stream of payload items and a +// bursty sideband mdata path. If there is no metadata in a packet, then an +// empty packet is output on m_axis_mdata_* (i.e., m_axis_mdata_tkeep will be +// set to 0). +// +// Parameters: +// +// - CHDR_W : Width of the input CHDR bus in bits +// - ITEM_W : Width of the output item bus in bits +// - NIPC : The number of output items delivered per cycle +// - SYNC_CLKS : Are the CHDR and data clocks synchronous to each other? +// - MDATA_FIFO_SIZE : FIFO size for the mdata path +// - INFO_FIFO_SIZE : FIFO size for the packet info path +// - PAYLOAD_FIFO_SIZE : FIFO size for the payload path +// - MDATA_PREFETCH_EN : Is mdata prefetching enabled? +// +// Signals: +// +// - s_axis_chdr_* : Input CHDR stream (AXI-Stream) +// - m_axis_* : Output payload data stream (AXI-Stream) +// - m_axis_mdata_* : Output mdata stream (AXI-Stream) +// - flush_* : Signals for flush control and status +// + +module chdr_to_axis_data_mdata #( + parameter CHDR_W = 256, + parameter ITEM_W = 32, + parameter NIPC = 2, + parameter SYNC_CLKS = 0, + parameter MDATA_FIFO_SIZE = 1, + parameter INFO_FIFO_SIZE = 1, + parameter PAYLOAD_FIFO_SIZE = 1, + parameter MDATA_PREFETCH_EN = 1 +)( + // Clock, reset and settings + input wire axis_chdr_clk, + input wire axis_chdr_rst, + input wire axis_data_clk, + input wire axis_data_rst, + // CHDR in (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_chdr_tdata, + input wire s_axis_chdr_tlast, + input wire s_axis_chdr_tvalid, + output wire s_axis_chdr_tready, + // Payload data stream out (AXI-Stream) + output wire [(ITEM_W*NIPC)-1:0] m_axis_tdata, + output wire [NIPC-1:0] m_axis_tkeep, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready, + // Payload sideband information + output wire [63:0] m_axis_ttimestamp, + output wire m_axis_thas_time, + output wire [15:0] m_axis_tlength, + output wire m_axis_teob, + output wire m_axis_teov, + // Metadata stream out (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_mdata_tdata, + output wire m_axis_mdata_tlast, + output wire m_axis_mdata_tkeep, + output wire m_axis_mdata_tvalid, + input wire m_axis_mdata_tready, + // Flush signals + input wire flush_en, + input wire [31:0] flush_timeout, + output wire flush_active, + output wire flush_done +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_axis_ctrl_utils.vh" + + // --------------------------------------------------- + // Pipeline + // --------------------------------------------------- + localparam CHDR_KEEP_W = CHDR_W/ITEM_W; + + wire [CHDR_W-1:0] in_chdr_tdata; + wire [CHDR_KEEP_W-1:0] in_chdr_tkeep; + wire in_chdr_tlast, in_chdr_tvalid; + reg in_chdr_tready; + + axi_fifo_flop2 #(.WIDTH(CHDR_W+1)) in_pipe_i ( + .clk(axis_chdr_clk), .reset(axis_chdr_rst), .clear(1'b0), + .i_tdata({s_axis_chdr_tlast, s_axis_chdr_tdata}), + .i_tvalid(s_axis_chdr_tvalid), .i_tready(s_axis_chdr_tready), + .o_tdata({in_chdr_tlast, in_chdr_tdata}), + .o_tvalid(in_chdr_tvalid), .o_tready(in_chdr_tready), + .space(), .occupied() + ); + + chdr_compute_tkeep #(.CHDR_W(CHDR_W), .ITEM_W(ITEM_W)) tkeep_gen_i ( + .clk(axis_chdr_clk), .rst(axis_chdr_rst), + .axis_tdata(in_chdr_tdata), .axis_tlast(in_chdr_tlast), + .axis_tvalid(in_chdr_tvalid), .axis_tready(in_chdr_tready), + .axis_tkeep(in_chdr_tkeep) + ); + + // --------------------------------------------------- + // Input State Machine + // --------------------------------------------------- + localparam INFO_W = 64+1+16+1+1; // timestamp, has_time, length, eob, eov + + wire [CHDR_W-1:0] in_pyld_tdata; + wire [CHDR_KEEP_W-1:0] in_pyld_tkeep; + wire in_pyld_tlast, in_pyld_tvalid, in_pyld_tready; + + reg [INFO_W-1:0] in_info_tdata; + reg in_info_tvalid; + wire in_info_tready; + + wire [CHDR_W-1:0] in_mdata_tdata; + wire in_mdata_tkeep; + wire in_mdata_tlast, in_mdata_tvalid, in_mdata_tready; + + + localparam [2:0] ST_HDR = 3'd0; // Processing the input CHDR header + localparam [2:0] ST_TS = 3'd1; // Processing the input CHDR timestamp + localparam [2:0] ST_MDATA = 3'd2; // Processing the input CHDR metadata word + localparam [2:0] ST_BODY = 3'd3; // Processing the input CHDR payload word + localparam [2:0] ST_DROP = 3'd4; // Something went wrong... Dropping packet + + reg [2:0] state = ST_HDR; + reg [4:0] mdata_pending = CHDR_NO_MDATA; + reg last_mdata_line; + + reg [15:0] chdr_length_reg; + reg chdr_eob_reg, chdr_eov_reg; + + // Shortcuts: CHDR header + wire [2:0] in_pkt_type = chdr_get_pkt_type(in_chdr_tdata[63:0]); + wire [4:0] in_num_mdata = chdr_get_num_mdata(in_chdr_tdata[63:0]); + + always @(posedge axis_chdr_clk) begin + if (axis_chdr_rst) begin + state <= ST_HDR; + end else if (in_chdr_tvalid & in_chdr_tready) begin + case (state) + + // ST_HDR: CHDR Header + // ------------------- + ST_HDR: begin + // Always cache the number of metadata words + mdata_pending <= in_num_mdata; + // Figure out the next state + if (!in_chdr_tlast) begin + if (CHDR_W > 64) begin + // When CHDR_W > 64, the timestamp is a part of the header word. + // If this is a data packet (with/without a TS), we move on to the metadata/body + // state otherwise we drop it. Non-data packets should never reach here. + if (in_pkt_type == CHDR_PKT_TYPE_DATA || in_pkt_type == CHDR_PKT_TYPE_DATA_TS) begin + if (in_num_mdata != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_BODY; + end + end else begin + state <= ST_DROP; + end + end else begin + // When CHDR_W == 64, the timestamp comes after the header. Check if this is a data + // packet with a TS to figure out the next state. If no TS, then check for metadata + // to move to the next state. Drop any non-data packets. + chdr_length_reg <= chdr_calc_payload_length(CHDR_W, in_chdr_tdata); + chdr_eob_reg <= chdr_get_eob(in_chdr_tdata); + chdr_eov_reg <= chdr_get_eov(in_chdr_tdata); + if (in_pkt_type == CHDR_PKT_TYPE_DATA_TS) begin + state <= ST_TS; + end else if (in_pkt_type == CHDR_PKT_TYPE_DATA) begin + if (in_num_mdata != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_BODY; + end + end else begin + state <= ST_DROP; + end + end + end else begin // Premature termination + // Packets must have at least one payload line + state <= ST_HDR; + end + end + + // ST_TS: Timestamp (CHDR_W == 64 only) + // ------------------------------------ + ST_TS: begin + if (!in_chdr_tlast) begin + if (mdata_pending != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_BODY; + end + end else begin // Premature termination + // Packets must have at least one payload line + state <= ST_HDR; + end + end + + // ST_MDATA: Metadata word + // ----------------------- + ST_MDATA: begin + if (!in_chdr_tlast) begin + // Count down metadata and stop at 1 + if (mdata_pending == 5'd1) begin + state <= ST_BODY; + end else begin + mdata_pending <= mdata_pending - 5'd1; + end + end else begin // Premature termination + // Packets must have at least one payload line + state <= ST_HDR; + end + end + + // ST_BODY: Payload word + // --------------------- + ST_BODY: begin + if (in_chdr_tlast) begin + state <= ST_HDR; + end + end + + // ST_DROP: Drop current packet + // ---------------------------- + ST_DROP: begin + if (in_chdr_tlast) begin + state <= ST_HDR; + end + end + + default: begin + // We should never get here + state <= ST_HDR; + end + endcase + end + end + + // CHDR data goes to the payload stream only in the BODY state. + // Packets are expected to have at least one payload word so the + // CHDR tlast can be used as the payload tlast + assign in_pyld_tdata = in_chdr_tdata; + assign in_pyld_tkeep = in_chdr_tkeep; + assign in_pyld_tlast = in_chdr_tlast; + assign in_pyld_tvalid = in_chdr_tvalid && (state == ST_BODY); + + // Only metadata goes into the mdata FIFO. However, if there is no metadata, + // then we want an empty packet to go into the mdata FIFO. We check the + // packet type because non-data packets will be discarded. + assign in_mdata_tdata = in_chdr_tdata; + assign in_mdata_tlast = in_chdr_tlast || last_mdata_line; + assign in_mdata_tkeep = (state == ST_MDATA); + assign in_mdata_tvalid = in_chdr_tvalid && ( + (state == ST_MDATA) || + (state == ST_HDR && in_num_mdata == CHDR_NO_MDATA && + (in_pkt_type == CHDR_PKT_TYPE_DATA || in_pkt_type == CHDR_PKT_TYPE_DATA_TS))); + + always @(*) begin + // Packet timestamp and flags go into the info FIFO, but only if it's a + // data packet since non-data packets will be discarded. + if (CHDR_W > 64) begin + // When CHDR_W > 64, all info will be in the first word of the CHDR packet + in_info_tdata = { in_chdr_tdata[127:64], + chdr_get_has_time(in_chdr_tdata), + chdr_calc_payload_length(CHDR_W, in_chdr_tdata), + chdr_get_eob(in_chdr_tdata), + chdr_get_eov(in_chdr_tdata) }; + in_info_tvalid = in_chdr_tvalid && (state == ST_HDR && + (in_pkt_type == CHDR_PKT_TYPE_DATA || in_pkt_type == CHDR_PKT_TYPE_DATA_TS)); + end else begin + // When CHDR_W == 64, the flags will be in the first word of the packet, + // but the timestamp will be in the second word, if there is a timestamp. + if (state == ST_HDR && in_pkt_type == CHDR_PKT_TYPE_DATA) begin + // No timestamp in this case + in_info_tdata = { in_chdr_tdata[63:0], 1'b0, + chdr_calc_payload_length(CHDR_W, in_chdr_tdata), + chdr_get_eob(in_chdr_tdata), chdr_get_eov(in_chdr_tdata) }; + in_info_tvalid = in_chdr_tvalid; + end else begin + // Assuming timestamp is present, so use flags from previous clock cycle + in_info_tdata = { in_chdr_tdata[63:0], 1'b1, chdr_length_reg, + chdr_eob_reg, chdr_eov_reg }; + in_info_tvalid = in_chdr_tvalid && (state == ST_TS); + end + end + + case (state) + ST_HDR: begin + in_chdr_tready = in_info_tready && in_mdata_tready; + last_mdata_line = (in_num_mdata == CHDR_NO_MDATA); + end + ST_TS: begin + in_chdr_tready = in_info_tready && in_mdata_tready; + last_mdata_line = 1'b0; + end + ST_MDATA: begin + in_chdr_tready = in_mdata_tready; + last_mdata_line = (mdata_pending == 5'd1); + end + ST_BODY: begin + in_chdr_tready = in_pyld_tready; + last_mdata_line = 1'b0; + end + ST_DROP: begin + in_chdr_tready = 1'b1; + last_mdata_line = 1'b0; + end + default: begin + in_chdr_tready = 1'b0; + last_mdata_line = 1'b0; + end + endcase + end + + // --------------------------------------------------- + // Payload and mdata FIFOs + // --------------------------------------------------- + wire [CHDR_W-1:0] out_pyld_tdata; + wire [CHDR_KEEP_W-1:0] out_pyld_tkeep; + wire out_pyld_tlast, out_pyld_tvalid, out_pyld_tready; + + wire tmp_mdata_tvalid, tmp_mdata_tready; + wire tmp_info_tready; + + wire [(ITEM_W*NIPC)-1:0] flush_pyld_tdata; + wire [NIPC-1:0] flush_pyld_tkeep; + wire flush_pyld_tlast, flush_pyld_tvalid, flush_pyld_tready; + wire [INFO_W-1:0] flush_info_tdata; + wire [CHDR_W-1:0] flush_mdata_tdata; + wire flush_mdata_tkeep; + wire flush_mdata_tlast, flush_mdata_tvalid, flush_mdata_tready; + + generate if (SYNC_CLKS) begin : gen_sync_fifo + axi_fifo #(.WIDTH(CHDR_W+2), .SIZE(MDATA_FIFO_SIZE)) mdata_fifo_i ( + .clk(axis_data_clk), .reset(axis_data_rst), .clear(1'b0), + .i_tdata({in_mdata_tkeep, in_mdata_tlast, in_mdata_tdata}), + .i_tvalid(in_mdata_tvalid), .i_tready(in_mdata_tready), + .o_tdata({flush_mdata_tkeep, flush_mdata_tlast, flush_mdata_tdata}), + .o_tvalid(tmp_mdata_tvalid), .o_tready(tmp_mdata_tready), + .space(), .occupied() + ); + axi_fifo #(.WIDTH(INFO_W), .SIZE(INFO_FIFO_SIZE)) info_fifo_i ( + .clk(axis_data_clk), .reset(axis_data_rst), .clear(1'b0), + .i_tdata(in_info_tdata), + .i_tvalid(in_info_tvalid), .i_tready(in_info_tready), + .o_tdata(flush_info_tdata), + .o_tvalid(), .o_tready(tmp_info_tready), + .space(), .occupied() + ); + axi_fifo #(.WIDTH(CHDR_W+CHDR_KEEP_W+1), .SIZE(PAYLOAD_FIFO_SIZE)) pyld_fifo_i ( + .clk(axis_data_clk), .reset(axis_data_rst), .clear(1'b0), + .i_tdata({in_pyld_tlast, in_pyld_tkeep, in_pyld_tdata}), + .i_tvalid(in_pyld_tvalid), .i_tready(in_pyld_tready), + .o_tdata({out_pyld_tlast, out_pyld_tkeep, out_pyld_tdata}), + .o_tvalid(out_pyld_tvalid), .o_tready(out_pyld_tready), + .space(), .occupied() + ); + end else begin : gen_async_fifo + axi_fifo_2clk #(.WIDTH(CHDR_W+2), .SIZE(MDATA_FIFO_SIZE)) mdata_fifo_i ( + .reset(axis_chdr_rst), + .i_aclk(axis_chdr_clk), + .i_tdata({in_mdata_tkeep, in_mdata_tlast, in_mdata_tdata}), + .i_tvalid(in_mdata_tvalid), .i_tready(in_mdata_tready), + .o_aclk(axis_data_clk), + .o_tdata({flush_mdata_tkeep, flush_mdata_tlast, flush_mdata_tdata}), + .o_tvalid(tmp_mdata_tvalid), .o_tready(tmp_mdata_tready) + ); + axi_fifo_2clk #(.WIDTH(INFO_W), .SIZE(INFO_FIFO_SIZE)) info_fifo_i ( + .reset(axis_chdr_rst), + .i_aclk(axis_chdr_clk), + .i_tdata(in_info_tdata), + .i_tvalid(in_info_tvalid), .i_tready(in_info_tready), + .o_aclk(axis_data_clk), + .o_tdata(flush_info_tdata), + .o_tvalid(), .o_tready(tmp_info_tready) + ); + axi_fifo_2clk #(.WIDTH(CHDR_W+CHDR_KEEP_W+1), .SIZE(PAYLOAD_FIFO_SIZE)) pyld_fifo_i ( + .reset(axis_chdr_rst), + .i_aclk(axis_chdr_clk), + .i_tdata({in_pyld_tlast, in_pyld_tkeep, in_pyld_tdata}), + .i_tvalid(in_pyld_tvalid), .i_tready(in_pyld_tready), + .o_aclk(axis_data_clk), + .o_tdata({out_pyld_tlast, out_pyld_tkeep, out_pyld_tdata}), + .o_tvalid(out_pyld_tvalid), .o_tready(out_pyld_tready) + ); + end endgenerate + + // --------------------------------------------------- + // Data Width Converter: CHDR_W => ITEM_W*NIPC + // --------------------------------------------------- + wire tmp_pyld_tvalid, tmp_pyld_tready; + + generate + if (CHDR_W != ITEM_W*NIPC) begin : gen_axis_width_conv + axis_width_conv #( + .WORD_W(ITEM_W), .IN_WORDS(CHDR_W/ITEM_W), .OUT_WORDS(NIPC), + .SYNC_CLKS(1), .PIPELINE("NONE") + ) payload_width_conv_i ( + .s_axis_aclk(axis_data_clk), .s_axis_rst(axis_data_rst), + .s_axis_tdata(out_pyld_tdata), .s_axis_tkeep(out_pyld_tkeep), + .s_axis_tlast(out_pyld_tlast), .s_axis_tvalid(out_pyld_tvalid), + .s_axis_tready(out_pyld_tready), + .m_axis_aclk(axis_data_clk), .m_axis_rst(axis_data_rst), + .m_axis_tdata(flush_pyld_tdata), .m_axis_tkeep(flush_pyld_tkeep), + .m_axis_tlast(flush_pyld_tlast), .m_axis_tvalid(tmp_pyld_tvalid), + .m_axis_tready(tmp_pyld_tready) + ); + end else begin : no_gen_axis_width_conv + assign flush_pyld_tdata = out_pyld_tdata; + assign flush_pyld_tkeep = out_pyld_tkeep; + assign flush_pyld_tlast = out_pyld_tlast; + assign tmp_pyld_tvalid = out_pyld_tvalid; + assign out_pyld_tready = tmp_pyld_tready; + end + endgenerate + + + // --------------------------------------------------- + // Output State Machine + // --------------------------------------------------- + reg [2:0] mdata_pkt_cnt = 3'd0, pyld_pkt_cnt = 3'd0; + + // A payload packet can pass only if it is preceded by a mdata packet + wire pass_pyld = ((mdata_pkt_cnt - pyld_pkt_cnt) > 3'd0); + // A mdata packet has to be blocked if its corresponding payload packet hasn't passed except + // when prefetching is enabled. In that case one additional mdata packet is allowed to pass + wire pass_mdata = ((mdata_pkt_cnt - pyld_pkt_cnt) < (MDATA_PREFETCH_EN == 1 ? 3'd2 : 3'd1)); + + always @(posedge axis_data_clk) begin + if (axis_data_rst) begin + mdata_pkt_cnt <= 3'd0; + pyld_pkt_cnt <= 3'd0; + end else begin + if (flush_mdata_tvalid && flush_mdata_tready && flush_mdata_tlast) + mdata_pkt_cnt <= mdata_pkt_cnt + 3'd1; + if (flush_pyld_tvalid && flush_pyld_tready && flush_pyld_tlast) + pyld_pkt_cnt <= pyld_pkt_cnt + 3'd1; + end + end + + assign flush_pyld_tvalid = tmp_pyld_tvalid && pass_pyld; + assign tmp_pyld_tready = flush_pyld_tready && pass_pyld; + + // Only read the info FIFO once per packet + assign tmp_info_tready = tmp_pyld_tready && flush_pyld_tlast && tmp_pyld_tvalid; + + assign flush_mdata_tvalid = tmp_mdata_tvalid && pass_mdata; + assign tmp_mdata_tready = flush_mdata_tready && pass_mdata; + + // --------------------------------------------------- + // Flushing Logic + // --------------------------------------------------- + wire [31:0] flush_timeout_dclk; + wire flush_en_dclk; + wire flush_active_pyld_cclk, flush_active_mdata_cclk; + wire flush_done_pyld_cclk, flush_done_mdata_cclk; + wire flush_active_pyld, flush_active_mdata; + wire flush_done_pyld, flush_done_mdata; + + synchronizer #(.WIDTH(4), .INITIAL_VAL(4'd0)) flush_2clk_rb_i ( + .clk(axis_chdr_clk), .rst(1'b0), + .in({flush_active_pyld, flush_done_pyld, + flush_active_mdata, flush_done_mdata}), + .out({flush_active_pyld_cclk, flush_done_pyld_cclk, + flush_active_mdata_cclk, flush_done_mdata_cclk}) + ); + assign flush_active = flush_active_pyld_cclk | flush_active_mdata_cclk; + assign flush_done = flush_done_pyld_cclk & flush_done_mdata_cclk; + + axi_fifo_2clk #(.WIDTH(33), .SIZE(1)) flush_2clk_ctrl_i ( + .reset(axis_chdr_rst), + .i_aclk(axis_chdr_clk), + .i_tdata({flush_en, flush_timeout}), .i_tvalid(1'b1), .i_tready(), + .o_aclk(axis_data_clk), + .o_tdata({flush_en_dclk, flush_timeout_dclk}), .o_tvalid(), .o_tready(1'b1) + ); + + axis_packet_flush #( + .WIDTH(INFO_W+(ITEM_W+1)*NIPC), .FLUSH_PARTIAL_PKTS(0), .TIMEOUT_W(32), .PIPELINE("OUT") + ) pyld_flusher_i ( + .clk(axis_data_clk), .reset(axis_data_rst), + .enable(flush_en_dclk), .timeout(flush_timeout_dclk), + .flushing(flush_active_pyld), .done(flush_done_pyld), + .s_axis_tdata({flush_info_tdata, flush_pyld_tkeep, flush_pyld_tdata}), + .s_axis_tlast(flush_pyld_tlast), + .s_axis_tvalid(flush_pyld_tvalid), + .s_axis_tready(flush_pyld_tready), + .m_axis_tdata({m_axis_ttimestamp, m_axis_thas_time, m_axis_tlength, + m_axis_teob, m_axis_teov, m_axis_tkeep, m_axis_tdata}), + .m_axis_tlast(m_axis_tlast), + .m_axis_tvalid(m_axis_tvalid), + .m_axis_tready(m_axis_tready) + ); + + axis_packet_flush #( + .WIDTH(CHDR_W+1), .FLUSH_PARTIAL_PKTS(0), .TIMEOUT_W(32), .PIPELINE("OUT") + ) mdata_flusher_i ( + .clk(axis_data_clk), .reset(axis_data_rst), + .enable(flush_en_dclk), .timeout(flush_timeout_dclk), + .flushing(flush_active_mdata), .done(flush_done_mdata), + .s_axis_tdata({flush_mdata_tkeep, flush_mdata_tdata}), + .s_axis_tlast(flush_mdata_tlast), + .s_axis_tvalid(flush_mdata_tvalid), + .s_axis_tready(flush_mdata_tready), + .m_axis_tdata({m_axis_mdata_tkeep, m_axis_mdata_tdata}), + .m_axis_tlast(m_axis_mdata_tlast), + .m_axis_tvalid(m_axis_mdata_tvalid), + .m_axis_tready(m_axis_mdata_tready) + ); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/core/chdr_to_axis_pyld_ctxt.v b/fpga/usrp3/lib/rfnoc/core/chdr_to_axis_pyld_ctxt.v new file mode 100644 index 000000000..f604584e8 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/chdr_to_axis_pyld_ctxt.v @@ -0,0 +1,458 @@ +// +// Copyright 2018-2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_to_axis_pyld_ctxt +// Description: +// A header deframer module for CHDR data packets. +// Accepts an input CHDR stream, and produces two output streams: +// 1) Payload, which contains the payload of the packet +// 2) Context, which contains the header info in the packet i.e. +// CHDR header, timestamp and metadata (marked with a tuser) +// This module also performs an optional clock crossing and data +// width convertion from CHDR_W to a user requested width for the +// payload bus. +// Context and data packets are interleaved i.e. a context packet +// will arrive before its corresponding data packet. However, if +// context prefetching is enabled, the context for the next packet +// might arrive before the data for the current packet has been +// consumed. In the case of a rate reduction, this allows the module +// to sustain a gapless stream of payload items and a bursty +// sideband context path. +// +// Parameters: +// - CHDR_W: Width of the input CHDR bus in bits +// - ITEM_W: Width of the output item bus in bits +// - NIPC: The number of output items delievered per cycle +// - SYNC_CLKS: Are the CHDR and data clocks synchronous to each other? +// - CONTEXT_FIFO_SIZE: FIFO size for the context path +// - PAYLOAD_FIFO_SIZE: FIFO size for the payload path +// - CONTEXT_PREFETCH_EN: Is context prefetching enabled? +// +// Signals: +// - s_axis_chdr_* : Input CHDR stream (AXI-Stream) +// - m_axis_payload_* : Output payload stream (AXI-Stream) +// - m_axis_context_* : Output context stream (AXI-Stream) +// - flush_* : Signals for flush control and status +// + +module chdr_to_axis_pyld_ctxt #( + parameter CHDR_W = 256, + parameter ITEM_W = 32, + parameter NIPC = 2, + parameter SYNC_CLKS = 0, + parameter CONTEXT_FIFO_SIZE = 1, + parameter PAYLOAD_FIFO_SIZE = 1, + parameter CONTEXT_PREFETCH_EN = 1 +)( + // Clock, reset and settings + input wire axis_chdr_clk, + input wire axis_chdr_rst, + input wire axis_data_clk, + input wire axis_data_rst, + // CHDR in (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_chdr_tdata, + input wire s_axis_chdr_tlast, + input wire s_axis_chdr_tvalid, + output wire s_axis_chdr_tready, + // Payload stream out (AXI-Stream) + output wire [(ITEM_W*NIPC)-1:0] m_axis_payload_tdata, + output wire [NIPC-1:0] m_axis_payload_tkeep, + output wire m_axis_payload_tlast, + output wire m_axis_payload_tvalid, + input wire m_axis_payload_tready, + // Context stream out (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_context_tdata, + output wire [3:0] m_axis_context_tuser, + output wire m_axis_context_tlast, + output wire m_axis_context_tvalid, + input wire m_axis_context_tready, + // Flush signals + input wire flush_en, + input wire [31:0] flush_timeout, + output wire flush_active, + output wire flush_done +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_axis_ctrl_utils.vh" + + // --------------------------------------------------- + // Pipeline + // --------------------------------------------------- + localparam CHDR_KEEP_W = CHDR_W/ITEM_W; + + wire [CHDR_W-1:0] in_chdr_tdata; + wire [CHDR_KEEP_W-1:0] in_chdr_tkeep; + wire in_chdr_tlast, in_chdr_tvalid; + reg in_chdr_tready; + + axi_fifo_flop2 #(.WIDTH(CHDR_W+1)) in_pipe_i ( + .clk(axis_chdr_clk), .reset(axis_chdr_rst), .clear(1'b0), + .i_tdata({s_axis_chdr_tlast, s_axis_chdr_tdata}), + .i_tvalid(s_axis_chdr_tvalid), .i_tready(s_axis_chdr_tready), + .o_tdata({in_chdr_tlast, in_chdr_tdata}), + .o_tvalid(in_chdr_tvalid), .o_tready(in_chdr_tready), + .space(), .occupied() + ); + + chdr_compute_tkeep #(.CHDR_W(CHDR_W), .ITEM_W(ITEM_W)) tkeep_gen_i ( + .clk(axis_chdr_clk), .rst(axis_chdr_rst), + .axis_tdata(in_chdr_tdata), .axis_tlast(in_chdr_tlast), + .axis_tvalid(in_chdr_tvalid), .axis_tready(in_chdr_tready), + .axis_tkeep(in_chdr_tkeep) + ); + + // --------------------------------------------------- + // Input State Machine + // --------------------------------------------------- + wire [CHDR_W-1:0] in_pyld_tdata; + wire [CHDR_KEEP_W-1:0] in_pyld_tkeep; + wire in_pyld_tlast, in_pyld_tvalid, in_pyld_tready; + + wire [CHDR_W-1:0] in_ctxt_tdata; + reg [3:0] in_ctxt_tuser; + wire in_ctxt_tlast, in_ctxt_tvalid, in_ctxt_tready; + + + localparam [2:0] ST_HDR = 3'd0; // Processing the input CHDR header + localparam [2:0] ST_TS = 3'd1; // Processing the input CHDR timestamp + localparam [2:0] ST_MDATA = 3'd2; // Processing the input CHDR metadata word + localparam [2:0] ST_BODY = 3'd3; // Processing the input CHDR payload word + localparam [2:0] ST_DROP = 3'd4; // Something went wrong... Dropping packet + + reg [2:0] state = ST_HDR; + reg [4:0] mdata_pending = CHDR_NO_MDATA; + reg last_ctxt_line; + + // Shortcuts: CHDR header + wire [2:0] in_pkt_type = chdr_get_pkt_type(in_chdr_tdata[63:0]); + wire [4:0] in_num_mdata = chdr_get_num_mdata(in_chdr_tdata[63:0]); + + always @(posedge axis_chdr_clk) begin + if (axis_chdr_rst) begin + state <= ST_HDR; + end else if (in_chdr_tvalid & in_chdr_tready) begin + case (state) + + // ST_HDR: CHDR Header + // ------------------- + ST_HDR: begin + // Always cache the number of metadata words + mdata_pending <= in_num_mdata; + // Figure out the next state + if (!in_chdr_tlast) begin + if (CHDR_W > 64) begin + // When CHDR_W > 64, the timestamp is a part of the header word. + // If this is a data packet (with/without a TS), we move on to the metadata/body + // state otherwise we drop it. Non-data packets should never reach here. + if (in_pkt_type == CHDR_PKT_TYPE_DATA || in_pkt_type == CHDR_PKT_TYPE_DATA_TS) begin + if (in_num_mdata != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_BODY; + end + end else begin + state <= ST_DROP; + end + end else begin + // When CHDR_W == 64, the timestamp comes after the header. Check if this is a data + // packet with a TS to figure out the next state. If no TS, then check for metadata + // to move to the next state. Drop any non-data packets. + if (in_pkt_type == CHDR_PKT_TYPE_DATA_TS) begin + state <= ST_TS; + end else if (in_pkt_type == CHDR_PKT_TYPE_DATA) begin + if (in_num_mdata != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_BODY; + end + end else begin + state <= ST_DROP; + end + end + end else begin // Premature termination + // Packets must have at least one payload line + state <= ST_HDR; + end + end + + // ST_TS: Timestamp (CHDR_W == 64 only) + // ------------------------------------ + ST_TS: begin + if (!in_chdr_tlast) begin + if (mdata_pending != CHDR_NO_MDATA) begin + state <= ST_MDATA; + end else begin + state <= ST_BODY; + end + end else begin // Premature termination + // Packets must have at least one payload line + state <= ST_HDR; + end + end + + // ST_MDATA: Metadata word + // ----------------------- + ST_MDATA: begin + if (!in_chdr_tlast) begin + // Count down metadata and stop at 1 + if (mdata_pending == 5'd1) begin + state <= ST_BODY; + end else begin + mdata_pending <= mdata_pending - 5'd1; + end + end else begin // Premature termination + // Packets must have at least one payload line + state <= ST_HDR; + end + end + + // ST_BODY: Payload word + // --------------------- + ST_BODY: begin + if (in_chdr_tlast) begin + state <= ST_HDR; + end + end + + // ST_DROP: Drop current packet + // ---------------------------- + ST_DROP: begin + if (in_chdr_tlast) begin + state <= ST_HDR; + end + end + + default: begin + // We should never get here + state <= ST_HDR; + end + endcase + end + end + + // CHDR data goes to the payload stream only in the BODY state. + // Packets are expected to have at least one payload word so the + // CHDR tlast can be used as the payload tlast + assign in_pyld_tdata = in_chdr_tdata; + assign in_pyld_tkeep = in_chdr_tkeep; + assign in_pyld_tlast = in_chdr_tlast; + assign in_pyld_tvalid = in_chdr_tvalid && (state == ST_BODY); + + // CHDR data goes to the context stream in the HDR,TS,MDATA state. + // tlast has to be recomputed for the context stream, however, we + // still need to correctly handle an errant packet without a payload + assign in_ctxt_tdata = in_chdr_tdata; + assign in_ctxt_tlast = in_chdr_tlast || last_ctxt_line; + assign in_ctxt_tvalid = in_chdr_tvalid && (state != ST_BODY && state != ST_DROP); + + always @(*) begin + case (state) + ST_HDR: begin + // The header goes to the context stream + in_chdr_tready <= in_ctxt_tready; + in_ctxt_tuser <= (CHDR_W > 64) ? CONTEXT_FIELD_HDR_TS : CONTEXT_FIELD_HDR; + last_ctxt_line <= (in_num_mdata == 7'd0) && ( + in_pkt_type == CHDR_PKT_TYPE_DATA || + (in_pkt_type == CHDR_PKT_TYPE_DATA_TS && CHDR_W > 64)); + end + ST_TS: begin + // The timestamp goes to the context stream + in_chdr_tready <= in_ctxt_tready; + in_ctxt_tuser <= CONTEXT_FIELD_TS; + last_ctxt_line <= (mdata_pending == CHDR_NO_MDATA); + end + ST_MDATA: begin + // The metadata goes to the context stream + in_chdr_tready <= in_ctxt_tready; + in_ctxt_tuser <= CONTEXT_FIELD_MDATA; + last_ctxt_line <= (mdata_pending == 5'd1); + end + ST_BODY: begin + // The body goes to the payload stream + in_chdr_tready <= in_pyld_tready; + in_ctxt_tuser <= 4'h0; + last_ctxt_line <= 1'b0; + end + ST_DROP: begin + // Errant packets get dropped + in_chdr_tready <= 1'b1; + in_ctxt_tuser <= 4'h0; + last_ctxt_line <= 1'b0; + end + default: begin + in_chdr_tready <= 1'b0; + in_ctxt_tuser <= 4'h0; + last_ctxt_line <= 1'b0; + end + endcase + end + + // --------------------------------------------------- + // Payload and Context FIFOs + // --------------------------------------------------- + wire [CHDR_W-1:0] out_pyld_tdata; + wire [CHDR_KEEP_W-1:0] out_pyld_tkeep; + wire out_pyld_tlast, out_pyld_tvalid, out_pyld_tready; + + wire tmp_ctxt_tvalid, tmp_ctxt_tready; + + wire [(ITEM_W*NIPC)-1:0] flush_pyld_tdata; + wire [NIPC-1:0] flush_pyld_tkeep; + wire flush_pyld_tlast, flush_pyld_tvalid, flush_pyld_tready; + wire [CHDR_W-1:0] flush_ctxt_tdata; + wire [3:0] flush_ctxt_tuser; + wire flush_ctxt_tlast, flush_ctxt_tvalid, flush_ctxt_tready; + + generate if (SYNC_CLKS) begin : gen_sync_fifo + axi_fifo #(.WIDTH(CHDR_W+4+1), .SIZE(CONTEXT_FIFO_SIZE)) ctxt_fifo_i ( + .clk(axis_data_clk), .reset(axis_data_rst), .clear(1'b0), + .i_tdata({in_ctxt_tlast, in_ctxt_tuser, in_ctxt_tdata}), + .i_tvalid(in_ctxt_tvalid), .i_tready(in_ctxt_tready), + .o_tdata({flush_ctxt_tlast, flush_ctxt_tuser, flush_ctxt_tdata}), + .o_tvalid(tmp_ctxt_tvalid), .o_tready(tmp_ctxt_tready), + .space(), .occupied() + ); + axi_fifo #(.WIDTH(CHDR_W+CHDR_KEEP_W+1), .SIZE(PAYLOAD_FIFO_SIZE)) pyld_fifo_i ( + .clk(axis_data_clk), .reset(axis_data_rst), .clear(1'b0), + .i_tdata({in_pyld_tlast, in_pyld_tkeep, in_pyld_tdata}), + .i_tvalid(in_pyld_tvalid), .i_tready(in_pyld_tready), + .o_tdata({out_pyld_tlast, out_pyld_tkeep, out_pyld_tdata}), + .o_tvalid(out_pyld_tvalid), .o_tready(out_pyld_tready), + .space(), .occupied() + ); + end else begin : gen_async_fifo + axi_fifo_2clk #(.WIDTH(CHDR_W+4+1), .SIZE(CONTEXT_FIFO_SIZE)) ctxt_fifo_i ( + .reset(axis_chdr_rst), + .i_aclk(axis_chdr_clk), + .i_tdata({in_ctxt_tlast, in_ctxt_tuser, in_ctxt_tdata}), + .i_tvalid(in_ctxt_tvalid), .i_tready(in_ctxt_tready), + .o_aclk(axis_data_clk), + .o_tdata({flush_ctxt_tlast, flush_ctxt_tuser, flush_ctxt_tdata}), + .o_tvalid(tmp_ctxt_tvalid), .o_tready(tmp_ctxt_tready) + ); + axi_fifo_2clk #(.WIDTH(CHDR_W+CHDR_KEEP_W+1), .SIZE(PAYLOAD_FIFO_SIZE)) pyld_fifo_i ( + .reset(axis_chdr_rst), + .i_aclk(axis_chdr_clk), + .i_tdata({in_pyld_tlast, in_pyld_tkeep, in_pyld_tdata}), + .i_tvalid(in_pyld_tvalid), .i_tready(in_pyld_tready), + .o_aclk(axis_data_clk), + .o_tdata({out_pyld_tlast, out_pyld_tkeep, out_pyld_tdata}), + .o_tvalid(out_pyld_tvalid), .o_tready(out_pyld_tready) + ); + end endgenerate + + // --------------------------------------------------- + // Data Width Converter: CHDR_W => ITEM_W*NIPC + // --------------------------------------------------- + wire tmp_pyld_tvalid, tmp_pyld_tready; + + axis_width_conv #( + .WORD_W(ITEM_W), .IN_WORDS(CHDR_W/ITEM_W), .OUT_WORDS(NIPC), + .SYNC_CLKS(1), .PIPELINE("NONE") + ) payload_width_conv_i ( + .s_axis_aclk(axis_data_clk), .s_axis_rst(axis_data_rst), + .s_axis_tdata(out_pyld_tdata), .s_axis_tkeep(out_pyld_tkeep), + .s_axis_tlast(out_pyld_tlast), .s_axis_tvalid(out_pyld_tvalid), + .s_axis_tready(out_pyld_tready), + .m_axis_aclk(axis_data_clk), .m_axis_rst(axis_data_rst), + .m_axis_tdata(flush_pyld_tdata), .m_axis_tkeep(flush_pyld_tkeep), + .m_axis_tlast(flush_pyld_tlast), .m_axis_tvalid(tmp_pyld_tvalid), + .m_axis_tready(tmp_pyld_tready) + ); + + // --------------------------------------------------- + // Output State Machine + // --------------------------------------------------- + reg [2:0] ctxt_pkt_cnt = 3'd0, pyld_pkt_cnt = 3'd0; + + // A payload packet can pass only if it is preceeded by a context packet + wire pass_pyld = ((ctxt_pkt_cnt - pyld_pkt_cnt) > 3'd0); + // A context packet has to be blocked if its corresponding payload packet hasn't passed except + // when prefetching is enabled. In that case one additional context packet is allowed to pass + wire pass_ctxt = ((ctxt_pkt_cnt - pyld_pkt_cnt) < (CONTEXT_PREFETCH_EN == 1 ? 3'd2 : 3'd1)); + + always @(posedge axis_data_clk) begin + if (axis_data_rst) begin + ctxt_pkt_cnt <= 3'd0; + pyld_pkt_cnt <= 3'd0; + end else begin + if (flush_ctxt_tvalid && flush_ctxt_tready && flush_ctxt_tlast) + ctxt_pkt_cnt <= ctxt_pkt_cnt + 3'd1; + if (flush_pyld_tvalid && flush_pyld_tready && flush_pyld_tlast) + pyld_pkt_cnt <= pyld_pkt_cnt + 3'd1; + end + end + + assign flush_pyld_tvalid = tmp_pyld_tvalid && pass_pyld; + assign tmp_pyld_tready = flush_pyld_tready && pass_pyld; + + assign flush_ctxt_tvalid = tmp_ctxt_tvalid && pass_ctxt; + assign tmp_ctxt_tready = flush_ctxt_tready && pass_ctxt; + + // --------------------------------------------------- + // Flushing Logic + // --------------------------------------------------- + wire [31:0] flush_timeout_dclk; + wire flush_en_dclk; + wire flush_active_pyld_cclk, flush_active_ctxt_cclk; + wire flush_done_pyld_cclk, flush_done_ctxt_cclk; + wire flush_active_pyld, flush_active_ctxt; + wire flush_done_pyld, flush_done_ctxt; + + synchronizer #(.WIDTH(4), .INITIAL_VAL(4'd0)) flush_2clk_rb_i ( + .clk(axis_chdr_clk), .rst(1'b0), + .in({flush_active_pyld, flush_done_pyld, + flush_active_ctxt, flush_done_ctxt}), + .out({flush_active_pyld_cclk, flush_done_pyld_cclk, + flush_active_ctxt_cclk, flush_done_ctxt_cclk}) + ); + assign flush_active = flush_active_pyld_cclk | flush_active_ctxt_cclk; + assign flush_done = flush_done_pyld_cclk & flush_done_ctxt_cclk; + + axi_fifo_2clk #(.WIDTH(33), .SIZE(1)) flush_2clk_ctrl_i ( + .reset(axis_chdr_rst), + .i_aclk(axis_chdr_clk), + .i_tdata({flush_en, flush_timeout}), .i_tvalid(1'b1), .i_tready(), + .o_aclk(axis_data_clk), + .o_tdata({flush_en_dclk, flush_timeout_dclk}), .o_tvalid(), .o_tready(1'b1) + ); + + axis_packet_flush #( + .WIDTH((ITEM_W+1)*NIPC), .FLUSH_PARTIAL_PKTS(0), .TIMEOUT_W(32), .PIPELINE("OUT") + ) pyld_flusher_i ( + .clk(axis_data_clk), .reset(axis_data_rst), + .enable(flush_en_dclk), .timeout(flush_timeout_dclk), + .flushing(flush_active_pyld), .done(flush_done_pyld), + .s_axis_tdata({flush_pyld_tkeep, flush_pyld_tdata}), + .s_axis_tlast(flush_pyld_tlast), + .s_axis_tvalid(flush_pyld_tvalid), + .s_axis_tready(flush_pyld_tready), + .m_axis_tdata({m_axis_payload_tkeep, m_axis_payload_tdata}), + .m_axis_tlast(m_axis_payload_tlast), + .m_axis_tvalid(m_axis_payload_tvalid), + .m_axis_tready(m_axis_payload_tready) + ); + + axis_packet_flush #( + .WIDTH(CHDR_W+4), .FLUSH_PARTIAL_PKTS(0), .TIMEOUT_W(32), .PIPELINE("OUT") + ) ctxt_flusher_i ( + .clk(axis_data_clk), .reset(axis_data_rst), + .enable(flush_en_dclk), .timeout(flush_timeout_dclk), + .flushing(flush_active_ctxt), .done(flush_done_ctxt), + .s_axis_tdata({flush_ctxt_tuser, flush_ctxt_tdata}), + .s_axis_tlast(flush_ctxt_tlast), + .s_axis_tvalid(flush_ctxt_tvalid), + .s_axis_tready(flush_ctxt_tready), + .m_axis_tdata({m_axis_context_tuser, m_axis_context_tdata}), + .m_axis_tlast(m_axis_context_tlast), + .m_axis_tvalid(m_axis_context_tvalid), + .m_axis_tready(m_axis_context_tready) + ); + +endmodule // chdr_to_axis_pyld_ctxt diff --git a/fpga/usrp3/lib/rfnoc/core/chdr_to_chdr_data.v b/fpga/usrp3/lib/rfnoc/core/chdr_to_chdr_data.v new file mode 100644 index 000000000..390d77bca --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/chdr_to_chdr_data.v @@ -0,0 +1,55 @@ +// +// Copyright 2018-2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_raw_data_to_chdr +// Description: +// A simple adapter for when CHDR data is requested as an +// interface to user logic. +// +// Parameters: +// - CHDR_W: Width of the input CHDR bus in bits +// +// Signals: +// - s_axis_chdr_* : Input CHDR stream (AXI-Stream) +// - m_axis_chdr_* : Output CHDR stream (AXI-Stream) +// - flush_* : Signals for flush control and status +// + +module chdr_to_chdr_data #( + parameter CHDR_W = 256 +)( + // Clock, reset and settings + input wire axis_chdr_clk, + input wire axis_chdr_rst, + // CHDR in (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_chdr_tdata, + input wire s_axis_chdr_tlast, + input wire s_axis_chdr_tvalid, + output wire s_axis_chdr_tready, + // CHDR in (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_chdr_tdata, + output wire m_axis_chdr_tlast, + output wire m_axis_chdr_tvalid, + input wire m_axis_chdr_tready, + // Flush signals + input wire flush_en, + input wire [31:0] flush_timeout, + output wire flush_active, + output wire flush_done +); + + axis_packet_flush #( + .WIDTH(CHDR_W), .FLUSH_PARTIAL_PKTS(0), .TIMEOUT_W(32), .PIPELINE("OUT") + ) chdr_flusher_i ( + .clk(axis_chdr_clk), .reset(axis_chdr_rst), + .enable(flush_en), .timeout(flush_timeout), + .flushing(flush_active), .done(flush_done), + .s_axis_tdata(s_axis_chdr_tdata), .s_axis_tlast(s_axis_chdr_tlast), + .s_axis_tvalid(s_axis_chdr_tvalid), .s_axis_tready(s_axis_chdr_tready), + .m_axis_tdata(m_axis_chdr_tdata), .m_axis_tlast(m_axis_chdr_tlast), + .m_axis_tvalid(m_axis_chdr_tvalid), .m_axis_tready(m_axis_chdr_tready) + ); + +endmodule // chdr_to_chdr_data diff --git a/fpga/usrp3/lib/rfnoc/core/ctrlport.vh b/fpga/usrp3/lib/rfnoc/core/ctrlport.vh new file mode 100644 index 000000000..7b5f9fcaa --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/ctrlport.vh @@ -0,0 +1,26 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport.vh +// Description: +// Defines constants for the control port interface. +// +// Requires rfnoc_axis_ctrl_utils.vh in same directory to be +// included first. + +//--------------------------------------------------------------- +// Signal widths +//--------------------------------------------------------------- +localparam CTRLPORT_ADDR_W = 20; +localparam CTRLPORT_DATA_W = 32; +localparam CTRLPORT_STS_W = 2; + +//--------------------------------------------------------------- +// Status values +//--------------------------------------------------------------- +localparam [1:0] CTRL_STS_OKAY = 2'b00; +localparam [1:0] CTRL_STS_CMDERR = 2'b01; +localparam [1:0] CTRL_STS_TSERR = 2'b10; +localparam [1:0] CTRL_STS_WARNING = 2'b11; diff --git a/fpga/usrp3/lib/rfnoc/core/ctrlport_endpoint.v b/fpga/usrp3/lib/rfnoc/core/ctrlport_endpoint.v new file mode 100644 index 000000000..4a7d7302a --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/ctrlport_endpoint.v @@ -0,0 +1,284 @@ +// +// Copyright 2018-2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_endpoint +// Description: +// A bidirectional AXIS-Control to Control-Port converter. +// Use this module in noc_shell to interface between the user +// logic (using ctrlport) and the rfnoc infrastructure (axis_ctrl) +// +// Parameters: +// - THIS_PORTID: The 10-bit ID of the control XB port that is +// connected to this converter. +// - SYNC_CLKS: Is rfnoc_ctrl_clk and ctrlport_clk the same clock? +// - AXIS_CTRL_MST_EN: Enable an AXIS-Ctrl master +// - AXIS_CTRL_SLV_EN: Enable an AXIS-Ctrl slave +// - SLAVE_FIFO_SIZE: FIFO depth for the slave port +// +// Signals: +// - *_rfnoc_ctrl_* : Input/output AXIS-Control stream (AXI-Stream) +// - *_ctrlport_* : Input/output control-port bus + +module ctrlport_endpoint #( + parameter [9:0] THIS_PORTID = 10'd0, + parameter SYNC_CLKS = 0, + parameter [0:0] AXIS_CTRL_MST_EN = 1, + parameter [0:0] AXIS_CTRL_SLV_EN = 1, + parameter SLAVE_FIFO_SIZE = 5 +)( + // Clocks, Resets, Misc + input wire rfnoc_ctrl_clk, + input wire rfnoc_ctrl_rst, + input wire ctrlport_clk, + input wire ctrlport_rst, + // AXIS-Control Bus + input wire [31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + output wire [31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready, + // Control Port Master (Request) + output wire m_ctrlport_req_wr, + output wire m_ctrlport_req_rd, + output wire [19:0] m_ctrlport_req_addr, + output wire [31:0] m_ctrlport_req_data, + output wire [3:0] m_ctrlport_req_byte_en, + output wire m_ctrlport_req_has_time, + output wire [63:0] m_ctrlport_req_time, + // Control Port Master (Response) + input wire m_ctrlport_resp_ack, + input wire [1:0] m_ctrlport_resp_status, + input wire [31:0] m_ctrlport_resp_data, + // Control Port Slave (Request) + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [9:0] s_ctrlport_req_portid, + input wire [15:0] s_ctrlport_req_rem_epid, + input wire [9:0] s_ctrlport_req_rem_portid, + input wire [31:0] s_ctrlport_req_data, + input wire [3:0] s_ctrlport_req_byte_en, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + // Control Port Slave (Response) + output wire s_ctrlport_resp_ack, + output wire [1:0] s_ctrlport_resp_status, + output wire [31:0] s_ctrlport_resp_data +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "rfnoc_chdr_utils.vh" + `include "rfnoc_axis_ctrl_utils.vh" + + // --------------------------------------------------- + // Clock Crossing + // --------------------------------------------------- + + wire [31:0] i_ctrl_tdata, o_ctrl_tdata; + wire i_ctrl_tlast, o_ctrl_tlast; + wire i_ctrl_tvalid, o_ctrl_tvalid; + wire i_ctrl_tready, o_ctrl_tready; + + generate + if (SYNC_CLKS) begin : gen_sync_fifos + axi_fifo #(.WIDTH(32+1), .SIZE(1)) in_fifo_i ( + .clk(ctrlport_clk), .reset(ctrlport_rst), .clear(1'b0), + .i_tdata({s_rfnoc_ctrl_tlast, s_rfnoc_ctrl_tdata}), + .i_tvalid(s_rfnoc_ctrl_tvalid), .i_tready(s_rfnoc_ctrl_tready), + .o_tdata({i_ctrl_tlast, i_ctrl_tdata}), + .o_tvalid(i_ctrl_tvalid), .o_tready(i_ctrl_tready), + .space(), .occupied() + ); + + axi_fifo #(.WIDTH(32+1), .SIZE(1)) out_fifo_i ( + .clk(ctrlport_clk), .reset(ctrlport_rst), .clear(1'b0), + .i_tdata({o_ctrl_tlast, o_ctrl_tdata}), + .i_tvalid(o_ctrl_tvalid), .i_tready(o_ctrl_tready), + .o_tdata({m_rfnoc_ctrl_tlast, m_rfnoc_ctrl_tdata}), + .o_tvalid(m_rfnoc_ctrl_tvalid), .o_tready(m_rfnoc_ctrl_tready), + .space(), .occupied() + ); + end else begin : gen_async_fifos + axi_fifo_2clk #(.WIDTH(32+1), .SIZE(1), .PIPELINE("IN")) in_fifo_i ( + .reset(rfnoc_ctrl_rst), + .i_aclk(rfnoc_ctrl_clk), + .i_tdata({s_rfnoc_ctrl_tlast, s_rfnoc_ctrl_tdata}), + .i_tvalid(s_rfnoc_ctrl_tvalid), .i_tready(s_rfnoc_ctrl_tready), + .o_aclk(ctrlport_clk), + .o_tdata({i_ctrl_tlast, i_ctrl_tdata}), + .o_tvalid(i_ctrl_tvalid), .o_tready(i_ctrl_tready) + ); + + axi_fifo_2clk #(.WIDTH(32+1), .SIZE(1), .PIPELINE("OUT")) out_fifo_i ( + .reset(ctrlport_rst), + .i_aclk(ctrlport_clk), + .i_tdata({o_ctrl_tlast, o_ctrl_tdata}), + .i_tvalid(o_ctrl_tvalid), .i_tready(o_ctrl_tready), + .o_aclk(rfnoc_ctrl_clk), + .o_tdata({m_rfnoc_ctrl_tlast, m_rfnoc_ctrl_tdata}), + .o_tvalid(m_rfnoc_ctrl_tvalid), .o_tready(m_rfnoc_ctrl_tready) + ); + + end + endgenerate + + // --------------------------------------------------- + // MUXING + // --------------------------------------------------- + wire [31:0] mst_req_tdata, mst_resp_tdata ; + wire mst_req_tlast, mst_resp_tlast ; + wire mst_req_tvalid, mst_resp_tvalid; + wire mst_req_tready, mst_resp_tready; + + wire [31:0] slv_req_tdata, slv_req_fifo_tdata, slv_resp_tdata ; + wire slv_req_tlast, slv_req_fifo_tlast, slv_resp_tlast ; + wire slv_req_tvalid, slv_req_fifo_tvalid, slv_resp_tvalid; + wire slv_req_tready, slv_req_fifo_tready, slv_resp_tready; + + generate + if (AXIS_CTRL_MST_EN == 1'b1 && AXIS_CTRL_SLV_EN == 1'b1) begin : gen_mst_slv_muxing + wire [31:0] in_hdr; + axi_demux #( + .WIDTH(32), .SIZE(2), .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(0) + ) demux_i ( + .clk(ctrlport_clk), .reset(ctrlport_rst), .clear(1'b0), + .header(in_hdr), .dest(axis_ctrl_get_is_ack(in_hdr)), + .i_tdata (i_ctrl_tdata ), + .i_tlast (i_ctrl_tlast ), + .i_tvalid(i_ctrl_tvalid), + .i_tready(i_ctrl_tready), + .o_tdata ({mst_resp_tdata, slv_req_tdata }), + .o_tlast ({mst_resp_tlast, slv_req_tlast }), + .o_tvalid({mst_resp_tvalid, slv_req_tvalid}), + .o_tready({mst_resp_tready, slv_req_tready}) + ); + + axi_mux #( + .WIDTH(32), .SIZE(2), .PRIO(0), .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(0) + ) mux_i ( + .clk(ctrlport_clk), .reset(ctrlport_rst), .clear(1'b0), + .i_tdata ({mst_req_tdata, slv_resp_tdata }), + .i_tlast ({mst_req_tlast, slv_resp_tlast }), + .i_tvalid({mst_req_tvalid, slv_resp_tvalid}), + .i_tready({mst_req_tready, slv_resp_tready}), + .o_tdata (o_ctrl_tdata ), + .o_tlast (o_ctrl_tlast ), + .o_tvalid(o_ctrl_tvalid), + .o_tready(o_ctrl_tready) + ); + + end else if (AXIS_CTRL_MST_EN == 1'b1) begin : gen_mst_muxing + + assign mst_resp_tdata = i_ctrl_tdata; + assign mst_resp_tlast = i_ctrl_tlast; + assign mst_resp_tvalid = i_ctrl_tvalid; + assign i_ctrl_tready = mst_resp_tready; + + assign o_ctrl_tdata = mst_req_tdata; + assign o_ctrl_tlast = mst_req_tlast; + assign o_ctrl_tvalid = mst_req_tvalid; + assign mst_req_tready = o_ctrl_tready; + + end else begin : gen_no_mst_muxing + + assign slv_req_tdata = i_ctrl_tdata; + assign slv_req_tlast = i_ctrl_tlast; + assign slv_req_tvalid = i_ctrl_tvalid; + assign i_ctrl_tready = slv_req_tready; + + assign o_ctrl_tdata = slv_resp_tdata; + assign o_ctrl_tlast = slv_resp_tlast; + assign o_ctrl_tvalid = slv_resp_tvalid; + assign slv_resp_tready = o_ctrl_tready; + + end + endgenerate + + // --------------------------------------------------- + // AXIS Control Master and Slave + // --------------------------------------------------- + + generate + if (AXIS_CTRL_MST_EN == 1'b1) begin : gen_ctrl_master + axis_ctrl_master #( .THIS_PORTID(THIS_PORTID) ) axis_ctrl_mst_i ( + .clk (ctrlport_clk), + .rst (ctrlport_rst), + .s_axis_ctrl_tdata (mst_resp_tdata), + .s_axis_ctrl_tlast (mst_resp_tlast), + .s_axis_ctrl_tvalid (mst_resp_tvalid), + .s_axis_ctrl_tready (mst_resp_tready), + .m_axis_ctrl_tdata (mst_req_tdata), + .m_axis_ctrl_tlast (mst_req_tlast), + .m_axis_ctrl_tvalid (mst_req_tvalid), + .m_axis_ctrl_tready (mst_req_tready), + .ctrlport_req_wr (s_ctrlport_req_wr), + .ctrlport_req_rd (s_ctrlport_req_rd), + .ctrlport_req_addr (s_ctrlport_req_addr), + .ctrlport_req_portid (s_ctrlport_req_portid), + .ctrlport_req_rem_epid (s_ctrlport_req_rem_epid), + .ctrlport_req_rem_portid(s_ctrlport_req_rem_portid), + .ctrlport_req_data (s_ctrlport_req_data), + .ctrlport_req_byte_en (s_ctrlport_req_byte_en), + .ctrlport_req_has_time (s_ctrlport_req_has_time), + .ctrlport_req_time (s_ctrlport_req_time), + .ctrlport_resp_ack (s_ctrlport_resp_ack), + .ctrlport_resp_status (s_ctrlport_resp_status), + .ctrlport_resp_data (s_ctrlport_resp_data) + ); + end else begin : gen_no_ctrl_master + assign mst_resp_tready = 1'b1; + assign mst_req_tlast = 1'b0; + assign mst_req_tvalid = 1'b0; + assign s_ctrlport_resp_ack = 1'b0; + end + + if (AXIS_CTRL_SLV_EN == 1'b1) begin : gen_ctrl_slave + axi_fifo #(.WIDTH(32+1), .SIZE(SLAVE_FIFO_SIZE)) slv_fifo_i ( + .clk(ctrlport_clk), .reset(ctrlport_rst), .clear(1'b0), + .i_tdata({slv_req_tlast, slv_req_tdata}), + .i_tvalid(slv_req_tvalid), .i_tready(slv_req_tready), + .o_tdata({slv_req_fifo_tlast, slv_req_fifo_tdata}), + .o_tvalid(slv_req_fifo_tvalid), .o_tready(slv_req_fifo_tready), + .space(), .occupied() + ); + + axis_ctrl_slave axis_ctrl_slv_i ( + .clk (ctrlport_clk), + .rst (ctrlport_rst), + .s_axis_ctrl_tdata (slv_req_fifo_tdata), + .s_axis_ctrl_tlast (slv_req_fifo_tlast), + .s_axis_ctrl_tvalid (slv_req_fifo_tvalid), + .s_axis_ctrl_tready (slv_req_fifo_tready), + .m_axis_ctrl_tdata (slv_resp_tdata), + .m_axis_ctrl_tlast (slv_resp_tlast), + .m_axis_ctrl_tvalid (slv_resp_tvalid), + .m_axis_ctrl_tready (slv_resp_tready), + .ctrlport_req_wr (m_ctrlport_req_wr), + .ctrlport_req_rd (m_ctrlport_req_rd), + .ctrlport_req_addr (m_ctrlport_req_addr), + .ctrlport_req_data (m_ctrlport_req_data), + .ctrlport_req_byte_en (m_ctrlport_req_byte_en), + .ctrlport_req_has_time(m_ctrlport_req_has_time), + .ctrlport_req_time (m_ctrlport_req_time), + .ctrlport_resp_ack (m_ctrlport_resp_ack), + .ctrlport_resp_status (m_ctrlport_resp_status), + .ctrlport_resp_data (m_ctrlport_resp_data) + ); + end else begin : gen_no_ctrl_slave + assign slv_req_fifo_tready = 1'b1; + assign slv_resp_tlast = 1'b0; + assign slv_resp_tvalid = 1'b0; + assign m_ctrlport_req_wr = 1'b0; + assign m_ctrlport_req_rd = 1'b0; + end + endgenerate + +endmodule // ctrlport_endpoint + diff --git a/fpga/usrp3/lib/rfnoc/core/rfnoc_axis_ctrl_utils.vh b/fpga/usrp3/lib/rfnoc/core/rfnoc_axis_ctrl_utils.vh new file mode 100644 index 000000000..5c3dab8ac --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/rfnoc_axis_ctrl_utils.vh @@ -0,0 +1,154 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +// ============================================================= +// AXIS-Ctrl Bitfields +// ============================================================= + +// ----------------------- +// Line 0: HDR_0 +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 31 is_ack Is this an acknowledgment to a transaction? +// 30 has_time Does the transaction have a timestamp? +// 29:24 seq_num Sequence number +// 23:20 num_data Number of data words +// 19:10 src_port Ctrl XB port that the source block is on +// 9:0 dst_port Ctrl XB port that the destination block is on + +// ----------------------- +// Line 1: HDR_1 +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 31:26 <Reserved> +// 25:16 rem_dst_port Ctrl XB port that the remote dest block is on +// 15:0 rem_dst_epid Endpoint ID of the remote dest of this msg + +// ----------------------- +// Line 2: TS_LO (Optional) +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 31:0 timestamp Lower 32 bits of the timestamp + +// ----------------------- +// Line 3: TS_HI (Optional) +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 31:0 timestamp Upper 32 bits of the timestamp + +// ----------------------- +// Line 4: OP Word +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 31:30 status The status of the ack +// 29:28 <Reserved> +// 27:24 opcode Operation Code +// 23:20 byte_en Byte enable strobe +// 19:0 address Address for transaction + +// AXIS-Ctrl Status +// +localparam [1:0] AXIS_CTRL_STS_OKAY = 2'b00; +localparam [1:0] AXIS_CTRL_STS_CMDERR = 2'b01; +localparam [1:0] AXIS_CTRL_STS_TSERR = 2'b10; +localparam [1:0] AXIS_CTRL_STS_WARNING = 2'b11; + +// AXIS-Ctrl Opcode Definitions +// +localparam [3:0] AXIS_CTRL_OPCODE_SLEEP = 4'd0; +localparam [3:0] AXIS_CTRL_OPCODE_WRITE = 4'd1; +localparam [3:0] AXIS_CTRL_OPCODE_READ = 4'd2; +localparam [3:0] AXIS_CTRL_OPCODE_WRITE_READ = 4'd3; + +// AXIS-Ctrl Getter Functions +// +function [0:0] axis_ctrl_get_is_ack(input [31:0] header); + axis_ctrl_get_is_ack = header[31]; +endfunction + +function [0:0] axis_ctrl_get_has_time(input [31:0] header); + axis_ctrl_get_has_time = header[30]; +endfunction + +function [5:0] axis_ctrl_get_seq_num(input [31:0] header); + axis_ctrl_get_seq_num = header[29:24]; +endfunction + +function [3:0] axis_ctrl_get_num_data(input [31:0] header); + axis_ctrl_get_num_data = header[23:20]; +endfunction + +function [9:0] axis_ctrl_get_src_port(input [31:0] header); + axis_ctrl_get_src_port = header[19:10]; +endfunction + +function [9:0] axis_ctrl_get_dst_port(input [31:0] header); + axis_ctrl_get_dst_port = header[9:0]; +endfunction + +function [15:0] axis_ctrl_get_rem_dst_epid(input [31:0] header); + axis_ctrl_get_rem_dst_epid = header[15:0]; +endfunction + +function [9:0] axis_ctrl_get_rem_dst_port(input [31:0] header); + axis_ctrl_get_rem_dst_port = header[25:16]; +endfunction + +function [1:0] axis_ctrl_get_status(input [31:0] header); + axis_ctrl_get_status = header[31:30]; +endfunction + +function [3:0] axis_ctrl_get_opcode(input [31:0] header); + axis_ctrl_get_opcode = header[27:24]; +endfunction + +function [3:0] axis_ctrl_get_byte_en(input [31:0] header); + axis_ctrl_get_byte_en = header[23:20]; +endfunction + +function [19:0] axis_ctrl_get_address(input [31:0] header); + axis_ctrl_get_address = header[19:0]; +endfunction + +// AXIS-Ctrl Setter Functions +// +function [31:0] axis_ctrl_build_hdr_lo( + input [0:0] is_ack, + input [0:0] has_time, + input [5:0] seq_num, + input [3:0] num_data, + input [9:0] src_port, + input [9:0] dst_port +); + axis_ctrl_build_hdr_lo = {is_ack, has_time, seq_num, num_data, src_port, dst_port}; +endfunction + +function [31:0] axis_ctrl_build_hdr_hi( + input [9:0] rem_dst_port, + input [15:0] rem_dst_epid +); + axis_ctrl_build_hdr_hi = {6'h0, rem_dst_port, rem_dst_epid}; +endfunction + +function [31:0] chdr_ctrl_build_hdr_hi( + input [15:0] src_epid +); + chdr_ctrl_build_hdr_hi = {16'h0, src_epid}; +endfunction + +function [31:0] axis_ctrl_build_op_word( + input [1:0] status, + input [3:0] opcode, + input [3:0] byte_en, + input [19:0] address +); + axis_ctrl_build_op_word = {status, 2'b00, opcode, byte_en, address}; +endfunction diff --git a/fpga/usrp3/lib/rfnoc/core/rfnoc_backend_iface.vh b/fpga/usrp3/lib/rfnoc/core/rfnoc_backend_iface.vh new file mode 100644 index 000000000..ec5c152f6 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/rfnoc_backend_iface.vh @@ -0,0 +1,52 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +// Each block has a backed interface that is 512 bits wide. This bus +// is split into 16 32-bit registers to it is preferable to have fields +// aligned at 32-bit boundaries + +// Backend Config +localparam BEC_FLUSH_TIMEOUT_OFFSET = 0; +localparam BEC_FLUSH_TIMEOUT_WIDTH = 32; +localparam BEC_FLUSH_EN_OFFSET = BEC_FLUSH_TIMEOUT_OFFSET + BEC_FLUSH_TIMEOUT_WIDTH; +localparam BEC_FLUSH_EN_WIDTH = 1; +localparam BEC_SOFT_CTRL_RST_OFFSET = BEC_FLUSH_EN_OFFSET + BEC_FLUSH_EN_WIDTH; +localparam BEC_SOFT_CTRL_RST_WIDTH = 1; +localparam BEC_SOFT_CHDR_RST_OFFSET = BEC_SOFT_CTRL_RST_OFFSET + BEC_SOFT_CTRL_RST_WIDTH; +localparam BEC_SOFT_CHDR_RST_WIDTH = 1; +localparam BEC_TOTAL_WIDTH = BEC_SOFT_CHDR_RST_OFFSET + BEC_SOFT_CHDR_RST_WIDTH; + +localparam [511:0] BEC_DEFAULT_VAL = { + {(512-BEC_TOTAL_WIDTH){1'b0}}, + 1'b1, // BEC_SOFT_CHDR_RST + 1'b1, // BEC_SOFT_CTRL_RST + 1'b0, // BEC_FLUSH_EN + 32'd0 // BEC_FLUSH_TIMEOUT +}; + +// Backend Status +localparam BES_PROTO_VER_OFFSET = 0; +localparam BES_PROTO_VER_WIDTH = 6; +localparam BES_NUM_DATA_I_OFFSET = BES_PROTO_VER_OFFSET + BES_PROTO_VER_WIDTH; +localparam BES_NUM_DATA_I_WIDTH = 6; +localparam BES_NUM_DATA_O_OFFSET = BES_NUM_DATA_I_OFFSET + BES_NUM_DATA_I_WIDTH; +localparam BES_NUM_DATA_O_WIDTH = 6; +localparam BES_CTRL_FIFOSIZE_OFFSET = BES_NUM_DATA_O_OFFSET + BES_NUM_DATA_O_WIDTH; +localparam BES_CTRL_FIFOSIZE_WIDTH = 6; +localparam BES_CTRL_MAX_ASYNC_MSGS_OFFSET = BES_CTRL_FIFOSIZE_OFFSET + BES_CTRL_FIFOSIZE_WIDTH; +localparam BES_CTRL_MAX_ASYNC_MSGS_WIDTH = 8; +localparam BES_NOC_ID_OFFSET = BES_CTRL_MAX_ASYNC_MSGS_OFFSET + BES_CTRL_MAX_ASYNC_MSGS_WIDTH; +localparam BES_NOC_ID_WIDTH = 32; +localparam BES_FLUSH_ACTIVE_OFFSET = BES_NOC_ID_OFFSET + BES_NOC_ID_WIDTH; +localparam BES_FLUSH_ACTIVE_WIDTH = 1; +localparam BES_FLUSH_DONE_OFFSET = BES_FLUSH_ACTIVE_OFFSET + BES_FLUSH_ACTIVE_WIDTH; +localparam BES_FLUSH_DONE_WIDTH = 1; +localparam BES_DATA_MTU_OFFSET = BES_FLUSH_DONE_OFFSET + BES_FLUSH_DONE_WIDTH; +localparam BES_DATA_MTU_WIDTH = 6; +localparam BES_TOTAL_WIDTH = BES_DATA_MTU_OFFSET + BES_DATA_MTU_WIDTH; + +// Protocol version for this definition +localparam [5:0] BACKEND_PROTO_VER = 6'd1;
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/core/rfnoc_chdr_internal_utils.vh b/fpga/usrp3/lib/rfnoc/core/rfnoc_chdr_internal_utils.vh new file mode 100644 index 000000000..1d70c0f1c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/rfnoc_chdr_internal_utils.vh @@ -0,0 +1,452 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +// ============================================================= +// Stream Status Bitfields +// ============================================================= + +// ----------------------- +// Line 0 +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 63:24 capacity_bytes Downstream buffer capacity in bytes +// 23:20 <reserved> +// 19:16 status Stream status code (enumeration) +// 15:0 src_epid Endpoint ID of the source of this msg + +// ----------------------- +// Line 1 +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 63:24 xfercnt_pkts Transfer count in packets +// 23:0 capacity_pkts Downstream buffer capacity in packets + +// ----------------------- +// Line 2 +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 63:0 xfercnt_bytes Transfer count in bytes + +// ----------------------- +// Line 3 +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 63:16 status_info Extended information about status (diagnostic only) +// 15:0 buff_info Extended information about buffer state (diagnostic only) + +localparam [3:0] CHDR_STRS_STATUS_OKAY = 4'd0; // No error +localparam [3:0] CHDR_STRS_STATUS_CMDERR = 4'd1; // Cmd execution failed +localparam [3:0] CHDR_STRS_STATUS_SEQERR = 4'd2; // Sequence number discontinuity +localparam [3:0] CHDR_STRS_STATUS_DATAERR = 4'd3; // Data integrity check failed +localparam [3:0] CHDR_STRS_STATUS_RTERR = 4'd4; // Unexpected destination + +// 64-bit fields +function [39:0] chdr64_strs_get_capacity_bytes(input [63:0] header); + chdr64_strs_get_capacity_bytes = header[63:24]; +endfunction + +function [3:0] chdr64_strs_get_status(input [63:0] header); + chdr64_strs_get_status = header[19:16]; +endfunction + +function [15:0] chdr64_strs_get_src_epid(input [63:0] header); + chdr64_strs_get_src_epid = header[15:0]; +endfunction + +function [39:0] chdr64_strs_get_xfercnt_pkts(input [63:0] header); + chdr64_strs_get_xfercnt_pkts = header[63:24]; +endfunction + +function [23:0] chdr64_strs_get_capacity_pkts(input [63:0] header); + chdr64_strs_get_capacity_pkts = header[23:0]; +endfunction + +function [63:0] chdr64_strs_get_xfercnt_bytes(input [63:0] header); + chdr64_strs_get_xfercnt_bytes = header[63:0]; +endfunction + +function [47:0] chdr64_strs_get_status_info(input [63:0] header); + chdr64_strs_get_status_info = header[63:16]; +endfunction + +function [15:0] chdr64_strs_get_buff_info(input [63:0] header); + chdr64_strs_get_buff_info = header[15:0]; +endfunction + + +// 128-bit fields +function [39:0] chdr128_strs_get_capacity_bytes(input [127:0] header); + chdr128_strs_get_capacity_bytes = chdr64_strs_get_capacity_bytes(header[63:0]); +endfunction + +function [3:0] chdr128_strs_get_status(input [127:0] header); + chdr128_strs_get_status = chdr64_strs_get_status(header[63:0]); +endfunction + +function [15:0] chdr128_strs_get_src_epid(input [127:0] header); + chdr128_strs_get_src_epid = chdr64_strs_get_src_epid(header[63:0]); +endfunction + +function [23:0] chdr128_strs_get_capacity_pkts(input [127:0] header); + chdr128_strs_get_capacity_pkts = chdr64_strs_get_capacity_pkts(header[127:64]); +endfunction + +function [39:0] chdr128_strs_get_xfercnt_pkts(input [127:0] header); + chdr128_strs_get_xfercnt_pkts = chdr64_strs_get_xfercnt_pkts(header[127:64]); +endfunction + +function [63:0] chdr128_strs_get_xfercnt_bytes(input [127:0] header); + chdr128_strs_get_xfercnt_bytes = chdr64_strs_get_xfercnt_bytes(header[63:0]); +endfunction + +function [47:0] chdr128_strs_get_status_info(input [127:0] header); + chdr128_strs_get_status_info = chdr64_strs_get_status_info(header[127:64]); +endfunction + +function [15:0] chdr128_strs_get_buff_info(input [127:0] header); + chdr128_strs_get_buff_info = chdr64_strs_get_buff_info(header[127:64]); +endfunction + + +// 256-bit fields +function [39:0] chdr256_strs_get_capacity_bytes(input [255:0] header); + chdr256_strs_get_capacity_bytes = chdr64_strs_get_capacity_bytes(header[63:0]); +endfunction + +function [3:0] chdr256_strs_get_status(input [255:0] header); + chdr256_strs_get_status = chdr64_strs_get_status(header[63:0]); +endfunction + +function [15:0] chdr256_strs_get_src_epid(input [255:0] header); + chdr256_strs_get_src_epid = chdr64_strs_get_src_epid(header[63:0]); +endfunction + +function [23:0] chdr256_strs_get_capacity_pkts(input [255:0] header); + chdr256_strs_get_capacity_pkts = chdr64_strs_get_capacity_pkts(header[127:64]); +endfunction + +function [39:0] chdr256_strs_get_xfercnt_pkts(input [255:0] header); + chdr256_strs_get_xfercnt_pkts = chdr64_strs_get_xfercnt_pkts(header[127:64]); +endfunction + +function [63:0] chdr256_strs_get_xfercnt_bytes(input [255:0] header); + chdr256_strs_get_xfercnt_bytes = chdr64_strs_get_xfercnt_bytes(header[191:128]); +endfunction + +function [47:0] chdr256_strs_get_status_info(input [255:0] header); + chdr256_strs_get_status_info = chdr64_strs_get_status_info(header[255:192]); +endfunction + +function [15:0] chdr256_strs_get_buff_info(input [255:0] header); + chdr256_strs_get_buff_info = chdr64_strs_get_buff_info(header[255:192]); +endfunction + +// Stream Status Setter Functions +// + +// 64-bit fields +function [63:0] chdr64_strs_build_w0( + input [39:0] capacity_bytes, + input [3:0] status, + input [15:0] src_epid +); + chdr64_strs_build_w0 = {capacity_bytes, 4'h0, status, src_epid}; +endfunction + +function [63:0] chdr64_strs_build_w1( + input [39:0] xfercnt_pkts, + input [23:0] capacity_pkts +); + chdr64_strs_build_w1 = {xfercnt_pkts, capacity_pkts}; +endfunction + +function [63:0] chdr64_strs_build_w2( + input [63:0] xfercnt_bytes +); + chdr64_strs_build_w2 = xfercnt_bytes; +endfunction + +function [63:0] chdr64_strs_build_w3( + input [47:0] status_info, + input [15:0] buff_info +); + chdr64_strs_build_w3 = {status_info, buff_info}; +endfunction + +// 128-bit fields +function [127:0] chdr128_strs_build_w0( + input [39:0] xfercnt_pkts, + input [23:0] capacity_pkts, + input [39:0] capacity_bytes, + input [3:0] status, + input [15:0] src_epid +); + chdr128_strs_build_w0 = { + chdr64_strs_build_w1(xfercnt_pkts, capacity_pkts), + chdr64_strs_build_w0(capacity_bytes, status, src_epid)}; +endfunction + +function [127:0] chdr128_strs_build_w1( + input [47:0] status_info, + input [15:0] buff_info, + input [63:0] xfercnt_bytes +); + chdr128_strs_build_w1 = { + chdr64_strs_build_w3(status_info, buff_info), + chdr64_strs_build_w2(xfercnt_bytes)}; +endfunction + +// 256-bit fields +function [255:0] chdr256_strs_build( + input [47:0] status_info, + input [15:0] buff_info, + input [63:0] xfercnt_bytes, + input [39:0] xfercnt_pkts, + input [23:0] capacity_pkts, + input [39:0] capacity_bytes, + input [3:0] status, + input [15:0] src_epid +); + chdr256_strs_build = { + chdr64_strs_build_w3(status_info, buff_info), + chdr64_strs_build_w2(xfercnt_bytes), + chdr64_strs_build_w1(xfercnt_pkts, capacity_pkts), + chdr64_strs_build_w0(capacity_bytes, status, src_epid)}; +endfunction + +// ============================================================= +// Stream Command Bitfields +// ============================================================= + +// ----------------------- +// Line 0 +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 63:24 num_pkts Downstream buffer capacity in bytes +// 23:20 op_data Payload for command +// 19:16 op_code Command operation code (enumeration) +// 15:0 src_epid Endpoint ID of the source of this msg + +// ----------------------- +// Line 1 +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 63:0 num_bytes Transfer count in packets + +localparam [3:0] CHDR_STRC_OPCODE_INIT = 4'd0; +localparam [3:0] CHDR_STRC_OPCODE_PING = 4'd1; +localparam [3:0] CHDR_STRC_OPCODE_RESYNC = 4'd2; + +// 64-bit fields +function [39:0] chdr64_strc_get_num_pkts(input [63:0] header); + chdr64_strc_get_num_pkts = header[63:24]; +endfunction + +function [3:0] chdr64_strc_get_op_data(input [63:0] header); + chdr64_strc_get_op_data = header[23:20]; +endfunction + +function [3:0] chdr64_strc_get_op_code(input [63:0] header); + chdr64_strc_get_op_code = header[19:16]; +endfunction + +function [15:0] chdr64_strc_get_src_epid(input [63:0] header); + chdr64_strc_get_src_epid = header[15:0]; +endfunction + +function [63:0] chdr64_strc_get_num_bytes(input [63:0] header); + chdr64_strc_get_num_bytes = header[63:0]; +endfunction + +// 128-bit fields +function [39:0] chdr128_strc_get_num_pkts(input [127:0] header); + chdr128_strc_get_num_pkts = chdr64_strc_get_num_pkts(header[63:0]); +endfunction + +function [3:0] chdr128_strc_get_op_data(input [127:0] header); + chdr128_strc_get_op_data = chdr64_strc_get_op_data(header[63:0]); +endfunction + +function [3:0] chdr128_strc_get_op_code(input [127:0] header); + chdr128_strc_get_op_code = chdr64_strc_get_op_code(header[63:0]); +endfunction + +function [15:0] chdr128_strc_get_src_epid(input [127:0] header); + chdr128_strc_get_src_epid = chdr64_strc_get_src_epid(header[63:0]); +endfunction + +function [63:0] chdr128_strc_get_num_bytes(input [127:0] header); + chdr128_strc_get_num_bytes = chdr64_strc_get_num_bytes(header[127:64]); +endfunction + +// Stream Command Setter Functions +// + +// 64-bit fields + +function [63:0] chdr64_strc_build_w0( + input [39:0] num_pkts, + input [3:0] op_data, + input [3:0] op_code, + input [15:0] src_epid +); + chdr64_strc_build_w0 = {num_pkts, op_data, op_code, src_epid}; +endfunction + +function [63:0] chdr64_strc_build_w1( + input [63:0] num_bytes +); + chdr64_strc_build_w1 = num_bytes; +endfunction + +// 128-bit fields +function [127:0] chdr128_strc_build( + input [63:0] num_bytes, + input [39:0] num_pkts, + input [3:0] op_data, + input [3:0] op_code, + input [15:0] src_epid +); + chdr128_strc_build = { + chdr64_strc_build_w1(num_bytes), + chdr64_strc_build_w0(num_pkts, op_data, op_code, src_epid)}; +endfunction + +// ============================================================= +// Management Packet Bitfields +// ============================================================= + +// ----------------------- +// HDR +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 63:48 proto_ver Protocol Version +// 47:45 chdr_w Bitwidth of the CHDR interface +// 44:26 <Reserved> +// 25:16 num_hops Number of hops that this message will take (TTL) +// 15:0 src_epid Endpoint ID of the source of this msg + +// ----------------------- +// OP +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 63:16 op_payload Operation Payload +// 15:8 op_code Operation code +// 7:0 ops_pending Number of operations pending in this hop + +localparam [2:0] CHDR_MGMT_WIDTH_64 = 3'd0; +localparam [2:0] CHDR_MGMT_WIDTH_128 = 3'd1; +localparam [2:0] CHDR_MGMT_WIDTH_256 = 3'd2; +localparam [2:0] CHDR_MGMT_WIDTH_512 = 3'd3; + +function [2:0] chdr_w_to_enum(input integer bits); + if (bits == 512) + chdr_w_to_enum = CHDR_MGMT_WIDTH_512; + else if (bits == 256) + chdr_w_to_enum = CHDR_MGMT_WIDTH_256; + else if (bits == 128) + chdr_w_to_enum = CHDR_MGMT_WIDTH_128; + else + chdr_w_to_enum = CHDR_MGMT_WIDTH_64; +endfunction + +localparam [7:0] CHDR_MGMT_OP_NOP = 8'd0; +localparam [7:0] CHDR_MGMT_OP_ADVERTISE = 8'd1; +localparam [7:0] CHDR_MGMT_OP_SEL_DEST = 8'd2; +localparam [7:0] CHDR_MGMT_OP_RETURN = 8'd3; +localparam [7:0] CHDR_MGMT_OP_INFO_REQ = 8'd4; +localparam [7:0] CHDR_MGMT_OP_INFO_RESP = 8'd5; +localparam [7:0] CHDR_MGMT_OP_CFG_WR_REQ = 8'd6; +localparam [7:0] CHDR_MGMT_OP_CFG_RD_REQ = 8'd7; +localparam [7:0] CHDR_MGMT_OP_CFG_RD_RESP = 8'd8; + +function [15:0] chdr_mgmt_get_proto_ver(input [63:0] header); + chdr_mgmt_get_proto_ver = header[63:48]; +endfunction + +function [2:0] chdr_mgmt_get_chdr_w(input [63:0] header); + chdr_mgmt_get_chdr_w = header[47:45]; +endfunction + +function [9:0] chdr_mgmt_get_num_hops(input [63:0] header); + chdr_mgmt_get_num_hops = header[25:16]; +endfunction + +function [15:0] chdr_mgmt_get_src_epid(input [63:0] header); + chdr_mgmt_get_src_epid = header[15:0]; +endfunction + +function [47:0] chdr_mgmt_get_op_payload(input [63:0] header); + chdr_mgmt_get_op_payload = header[63:16]; +endfunction + +function [7:0] chdr_mgmt_get_op_code(input [63:0] header); + chdr_mgmt_get_op_code = header[15:8]; +endfunction + +function [7:0] chdr_mgmt_get_ops_pending(input [63:0] header); + chdr_mgmt_get_ops_pending = header[7:0]; +endfunction + +function [63:0] chdr_mgmt_build_hdr( + input [15:0] proto_ver, + input [2:0] chdr_w, + input [9:0] num_hops, + input [15:0] src_epid +); + chdr_mgmt_build_hdr = {proto_ver, chdr_w, 19'h0, num_hops, src_epid}; +endfunction + +function [63:0] chdr_mgmt_build_op( + input [47:0] op_payload, + input [7:0] op_code, + input [7:0] ops_pending +); + chdr_mgmt_build_op = {op_payload, op_code, ops_pending}; +endfunction + +// Definition for the TID field for the output of chdr_mgmt_pkt_handler +localparam [1:0] CHDR_MGMT_ROUTE_EPID = 2'd0; // Route based on EPID +localparam [1:0] CHDR_MGMT_ROUTE_TDEST = 2'd1; // Route based on tdest field +localparam [1:0] CHDR_MGMT_RETURN_TO_SRC = 2'd2; // Return packet to sender + +// ----------------------- +// OP specific fields +// ----------------------- + +localparam [3:0] NODE_TYPE_INVALID = 4'd0; +localparam [3:0] NODE_TYPE_XBAR = 4'd1; +localparam [3:0] NODE_TYPE_STREAM_EP = 4'd2; +localparam [3:0] NODE_TYPE_TRANSPORT = 4'd3; + +function [47:0] chdr_mgmt_build_node_info( + input [17:0] ext_info, + input [9:0] node_inst, + input [3:0] node_type, + input [15:0] device_id +); + chdr_mgmt_build_node_info = {ext_info, node_inst, node_type, device_id}; +endfunction + +function [9:0] chdr_mgmt_sel_dest_get_tdest(input [47:0] payload); + chdr_mgmt_sel_dest_get_tdest = payload[9:0]; +endfunction + +function [15:0] chdr_mgmt_cfg_reg_get_addr(input [47:0] payload); + chdr_mgmt_cfg_reg_get_addr = payload[15:0]; +endfunction + +function [31:0] chdr_mgmt_cfg_reg_get_data(input [47:0] payload); + chdr_mgmt_cfg_reg_get_data = payload[47:16]; +endfunction diff --git a/fpga/usrp3/lib/rfnoc/core/rfnoc_chdr_utils.vh b/fpga/usrp3/lib/rfnoc/core/rfnoc_chdr_utils.vh new file mode 100644 index 000000000..047d58bc0 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/rfnoc_chdr_utils.vh @@ -0,0 +1,200 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +// ============================================================= +// CHDR Bitfields +// ============================================================= +// +// The Condensed Hierarchical Datagram for RFNoC (CHDR) is +// a protocol that defines the fundamental unit of data transfer +// in an RFNoC network. +// +// ----------------------- +// Header +// ----------------------- +// Bits Name Meaning +// ---- ---- ------- +// 63:58 vc Virtual Channel +// 57 eob End of Burst Delimiter +// 56 eov End of Vector Delimiter +// 55:53 pkt_type Packet Type (enumeration) +// 52:48 num_mdata Number of lines of metadata +// 47:32 seq_num Sequence number for the packet +// 31:16 length Length of the datagram in bytes +// 15:0 dst_epid Destination Endpoint ID +// +// Field: Packet Type +// ----------------------- +// 3'd0 Management +// 3'd1 Stream Status +// 3'd2 Stream Command +// 3'd3 <Reserved> +// 3'd4 Control Transaction +// 3'd5 <Reserved> +// 3'd6 Data (without timestamp) +// 3'd7 Data (with timestamp) +// + +// Special CHDR Values +// + +// Packet Type +localparam [2:0] CHDR_PKT_TYPE_MGMT = 3'd0; +localparam [2:0] CHDR_PKT_TYPE_STRS = 3'd1; +localparam [2:0] CHDR_PKT_TYPE_STRC = 3'd2; +//localparam [2:0] RESERVED = 3'd3; +localparam [2:0] CHDR_PKT_TYPE_CTRL = 3'd4; +//localparam [2:0] RESERVED = 3'd5; +localparam [2:0] CHDR_PKT_TYPE_DATA = 3'd6; +localparam [2:0] CHDR_PKT_TYPE_DATA_TS = 3'd7; + +// Metadata +localparam [4:0] CHDR_NO_MDATA = 5'd0; + +// EPID +localparam [15:0] NULL_EPID = 16'd0; + +// CHDR Getter Functions +// +function [5:0] chdr_get_vc(input [63:0] header); + chdr_get_vc = header[63:58]; +endfunction + +function [0:0] chdr_get_eob(input [63:0] header); + chdr_get_eob = header[57]; +endfunction + +function [0:0] chdr_get_eov(input [63:0] header); + chdr_get_eov = header[56]; +endfunction + +function [2:0] chdr_get_pkt_type(input [63:0] header); + chdr_get_pkt_type = header[55:53]; +endfunction + +function [4:0] chdr_get_num_mdata(input [63:0] header); + chdr_get_num_mdata = header[52:48]; +endfunction + +function [15:0] chdr_get_seq_num(input [63:0] header); + chdr_get_seq_num = header[47:32]; +endfunction + +function [15:0] chdr_get_length(input [63:0] header); + chdr_get_length = header[31:16]; +endfunction + +function [15:0] chdr_get_dst_epid(input [63:0] header); + chdr_get_dst_epid = header[15:0]; +endfunction + +// CHDR Setter Functions +// +function [63:0] chdr_build_header( + input [5:0] vc, + input [0:0] eob, + input [0:0] eov, + input [2:0] pkt_type, + input [4:0] num_mdata, + input [15:0] seq_num, + input [15:0] length, + input [15:0] dst_epid +); + chdr_build_header = {vc, eob, eov, pkt_type, num_mdata, seq_num, length, dst_epid}; +endfunction + +function [63:0] chdr_set_vc( + input [63:0] base_hdr, + input [5:0] vc +); + chdr_set_vc = {vc, base_hdr[57:0]}; +endfunction + +function [63:0] chdr_set_eob( + input [63:0] base_hdr, + input [0:0] eob +); + chdr_set_eob = {base_hdr[63:58], eob, base_hdr[56:0]}; +endfunction + +function [63:0] chdr_set_eov( + input [63:0] base_hdr, + input [0:0] eov +); + chdr_set_eov = {base_hdr[63:57], eov, base_hdr[55:0]}; +endfunction + +function [63:0] chdr_set_delims( + input [63:0] base_hdr, + input [0:0] eob, + input [0:0] eov +); + chdr_set_delims = {base_hdr[63:58], eob, eov, base_hdr[55:0]}; +endfunction + +function [63:0] chdr_set_pkt_type( + input [63:0] base_hdr, + input [2:0] pkt_type +); + chdr_set_pkt_type = {base_hdr[63:56], pkt_type, base_hdr[52:0]}; +endfunction + +function [63:0] chdr_set_num_mdata( + input [63:0] base_hdr, + input [4:0] num_mdata +); + chdr_set_num_mdata = {base_hdr[63:53], num_mdata, base_hdr[47:0]}; +endfunction + +function [63:0] chdr_set_seq_num( + input [63:0] base_hdr, + input [15:0] seq_num +); + chdr_set_seq_num = {base_hdr[63:48], seq_num, base_hdr[31:0]}; +endfunction + +function [63:0] chdr_set_length( + input [63:0] base_hdr, + input [15:0] length +); + chdr_set_length = {base_hdr[63:32], length, base_hdr[15:0]}; +endfunction + +function [63:0] chdr_set_dst_epid( + input [63:0] base_hdr, + input [15:0] dst_epid +); + chdr_set_dst_epid = {base_hdr[63:16], dst_epid}; +endfunction + +// ============================================================= +// Data Packet Specific +// ============================================================= + +localparam [3:0] CONTEXT_FIELD_HDR = 4'd0; +localparam [3:0] CONTEXT_FIELD_HDR_TS = 4'd1; +localparam [3:0] CONTEXT_FIELD_TS = 4'd2; +localparam [3:0] CONTEXT_FIELD_MDATA = 4'd3; + +function [0:0] chdr_get_has_time(input [63:0] header); + chdr_get_has_time = (chdr_get_pkt_type(header) == CHDR_PKT_TYPE_DATA_TS); +endfunction + +// Calculate the payload length in bytes based on the CHDR_W and header +function [15:0] chdr_calc_payload_length(input [31:0] chdr_w, input [63:0] header); + reg [15:0] payload_length, mdata_length, header_length; + begin + if (chdr_w == 64) begin + header_length = chdr_get_has_time(header) ? 2*(chdr_w/8) : (chdr_w/8); + end else begin + header_length = chdr_w/8; + end + mdata_length = chdr_get_num_mdata(header) * (chdr_w/8); + payload_length = chdr_get_length(header) - mdata_length - header_length; + + chdr_calc_payload_length = payload_length; + end +endfunction diff --git a/fpga/usrp3/lib/rfnoc/core/rfnoc_core_kernel.v b/fpga/usrp3/lib/rfnoc/core/rfnoc_core_kernel.v new file mode 100644 index 000000000..15a7940a4 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/core/rfnoc_core_kernel.v @@ -0,0 +1,385 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: rfnoc_core_kernel +// Description: +// The main utility and software interface module for an +// assembled rfnoc design +// +// Parameters: +// - PROTOVER: RFNoC protocol version {8'd<major>, 8'd<minor>} +// - DEVICE_TYPE: The device type to use in the Device Info register +// - DEVICE_FAMILY: The device family (to pass to Xilinx primitives) +// - SAFE_START_CLKS: Instantiate logic to ensure that all output +// clocks are glitch-free and startup safely +// - NUM_BLOCKS: Number of blocks instantiated in the design +// - NUM_STREAM_ENDPOINTS: Number of stream EPs instantiated in the design +// - NUM_ENDPOINTS_CTRL: Number of stream EPs connected to the ctrl crossbar +// - NUM_TRANSPORTS: Number of transports instantiated in the design +// - NUM_EDGES: Number of edges of static connection in the design +// - CHDR_XBAR_PRESENT: 1 if the CHDR crossbar is present. If 0 then +// transports are directly connected to SEPs +// - EDGE_TBL_FILE: The memory init file for the static connection +// adjacency list +// +// Signals: +// - chdr_aclk : The input CHDR clock (may be unbuffered if SAFE_START_CLKS=1) +// - chdr_aclk_locked : The PLL locked pin for the input CHDR clock (unused if SAFE_START_CLKS=0) +// - ctrl_aclk : The input Control clock (may be unbuffered if SAFE_START_CLKS=1) +// - ctrl_aclk_locked : The PLL locked pin for the input Control clock (unused if SAFE_START_CLKS=0) +// - core_chdr_clk: Output stable CHDR clock for the rest of the design +// - core_chdr_rst: Output sync CHDR reset for all infrastructure modules (not blocks) +// - core_ctrl_clk: Output stable Control clock for the rest of the design +// - core_ctrl_rst: Output sync Control reset for all infrastructure modules (not blocks) +// - s_axis_ctrl_* : Slave AXIS-Ctrl for the primary (zero'th) control endpoint +// - m_axis_ctrl_* : Master AXIS-Ctrl for the primary (zero'th) control endpoint +// - device_id: The dynamic device_id to read through the Device Info register (domain: core_chdr_clk) +// - rfnoc_core_config: The backend config port for all blocks in the design (domain: core_ctrl_clk) +// - rfnoc_core_status: The backend status port for all blocks in the design (domain: core_ctrl_clk) + +module rfnoc_core_kernel #( + parameter [15:0] PROTOVER = {8'd1, 8'd0}, + parameter [15:0] DEVICE_TYPE = 16'd0, + parameter DEVICE_FAMILY = "7SERIES", + parameter SAFE_START_CLKS = 0, + parameter [9:0] NUM_BLOCKS = 0, + parameter [9:0] NUM_STREAM_ENDPOINTS = 0, + parameter [9:0] NUM_ENDPOINTS_CTRL = 0, + parameter [9:0] NUM_TRANSPORTS = 0, + parameter [11:0] NUM_EDGES = 0, + parameter [0:0] CHDR_XBAR_PRESENT = 1, + parameter EDGE_TBL_FILE = "" +)( + // Input clocks and resets + input wire chdr_aclk, + input wire chdr_aclk_locked, + input wire ctrl_aclk, + input wire ctrl_aclk_locked, + input wire core_arst, + // Output clocks and resets + output wire core_chdr_clk, + output wire core_chdr_rst, + output wire core_ctrl_clk, + output wire core_ctrl_rst, + // AXIS-Control Bus + input wire [31:0] s_axis_ctrl_tdata, + input wire s_axis_ctrl_tlast, + input wire s_axis_ctrl_tvalid, + output wire s_axis_ctrl_tready, + output wire [31:0] m_axis_ctrl_tdata, + output wire m_axis_ctrl_tlast, + output wire m_axis_ctrl_tvalid, + input wire m_axis_ctrl_tready, + // Global info (domain: core_chdr_clk) + input wire [15:0] device_id, + // Backend config/status for each block (domain: core_ctrl_clk) + output wire [(512*NUM_BLOCKS)-1:0] rfnoc_core_config, + input wire [(512*NUM_BLOCKS)-1:0] rfnoc_core_status +); + + `include "rfnoc_axis_ctrl_utils.vh" + `include "rfnoc_backend_iface.vh" + + // ----------------------------------- + // Clocking and Resets + // ----------------------------------- + + generate if (SAFE_START_CLKS == 1) begin + // Safe startup logic for the CHDR and Control clocks: + // chdr_aclk and ctrl_aclk can be unbuffered. + // Use a BUFGCE to disable the clock until the upstream + // PLLs have locked. + + wire chdr_ce_clk, ctrl_ce_clk; + (* keep = "true" *) (* async_reg = "true" *) reg [7:0] chdr_clk_ce_shreg = 8'h0; + (* keep = "true" *) (* async_reg = "true" *) reg [7:0] ctrl_clk_ce_shreg = 8'h0; + + // A glitch-free clock buffer with an enable + BUFGCE chdr_clk_buf_i ( + .I (chdr_aclk), + .CE(chdr_clk_ce_shreg[7]), + .O (core_chdr_clk) + ); + // A separate clock buffer for the CE signal + // We instantiate this manually to prevent the tools from instantiating + // the more scare BUFG here. There are a lot more BUFHs than BUFGs + BUFH chdr_ce_buf_i ( + .I(chdr_aclk), + .O(chdr_ce_clk) + ); + always @(posedge chdr_ce_clk) begin + chdr_clk_ce_shreg <= {chdr_clk_ce_shreg[6:0], chdr_aclk_locked}; + end + + // A glitch-free clock buffer with an enable + BUFGCE ctrl_clk_buf_i ( + .I (ctrl_aclk), + .CE(ctrl_clk_ce_shreg[7]), + .O (core_ctrl_clk) + ); + // A separate clock buffer for the CE signal + // We instantiate this manually to prevent the tools from instantiating + // the more scare BUFG here. There are a lot more BUFHs than BUFGs + BUFH ctrl_ce_buf_i ( + .I(ctrl_aclk), + .O(ctrl_ce_clk) + ); + always @(posedge ctrl_ce_clk) begin + ctrl_clk_ce_shreg <= {ctrl_clk_ce_shreg[6:0], ctrl_aclk_locked}; + end + end else begin + // We assume that chdr_aclk and ctrl_aclk start safely and are glitch-free + assign core_chdr_clk = chdr_aclk; + assign core_ctrl_clk = ctrl_aclk; + end endgenerate + + reset_sync rst_sync_chdr_i ( + .clk(core_chdr_clk), .reset_in(core_arst), .reset_out(core_chdr_rst) + ); + reset_sync rst_sync_ctrl_i ( + .clk(core_ctrl_clk), .reset_in(core_arst), .reset_out(core_ctrl_rst) + ); + + // ----------------------------------- + // AXIS-Ctrl Slave + // ----------------------------------- + + wire ctrlport_req_wr; + wire ctrlport_req_rd; + wire [19:0] ctrlport_req_addr; + wire [31:0] ctrlport_req_data; + reg ctrlport_resp_ack; + reg [31:0] ctrlport_resp_data; + + // The port ID of this endpoint must be zero + localparam [9:0] RFNOC_CORE_PORT_ID = 10'd0; + + ctrlport_endpoint #( + .THIS_PORTID(RFNOC_CORE_PORT_ID), .SYNC_CLKS(1), + .AXIS_CTRL_MST_EN(0), .AXIS_CTRL_SLV_EN(1), + .SLAVE_FIFO_SIZE(5) + ) ctrlport_ep_i ( + .rfnoc_ctrl_clk (core_ctrl_clk ), + .rfnoc_ctrl_rst (core_ctrl_rst ), + .ctrlport_clk (core_ctrl_clk ), + .ctrlport_rst (core_ctrl_rst ), + .s_rfnoc_ctrl_tdata (s_axis_ctrl_tdata ), + .s_rfnoc_ctrl_tlast (s_axis_ctrl_tlast ), + .s_rfnoc_ctrl_tvalid (s_axis_ctrl_tvalid ), + .s_rfnoc_ctrl_tready (s_axis_ctrl_tready ), + .m_rfnoc_ctrl_tdata (m_axis_ctrl_tdata ), + .m_rfnoc_ctrl_tlast (m_axis_ctrl_tlast ), + .m_rfnoc_ctrl_tvalid (m_axis_ctrl_tvalid ), + .m_rfnoc_ctrl_tready (m_axis_ctrl_tready ), + .m_ctrlport_req_wr (ctrlport_req_wr ), + .m_ctrlport_req_rd (ctrlport_req_rd ), + .m_ctrlport_req_addr (ctrlport_req_addr ), + .m_ctrlport_req_data (ctrlport_req_data ), + .m_ctrlport_req_byte_en (/* not supported */), + .m_ctrlport_req_has_time (/* not supported */), + .m_ctrlport_req_time (/* not supported */), + .m_ctrlport_resp_ack (ctrlport_resp_ack ), + .m_ctrlport_resp_status (AXIS_CTRL_STS_OKAY ), + .m_ctrlport_resp_data (ctrlport_resp_data ), + .s_ctrlport_req_wr (1'b0 ), + .s_ctrlport_req_rd (1'b0 ), + .s_ctrlport_req_addr (20'd0 ), + .s_ctrlport_req_portid (10'd0 ), + .s_ctrlport_req_rem_epid (16'd0 ), + .s_ctrlport_req_rem_portid(10'd0 ), + .s_ctrlport_req_data (32'h0 ), + .s_ctrlport_req_byte_en (4'h0 ), + .s_ctrlport_req_has_time (1'b0 ), + .s_ctrlport_req_time (1'b0 ), + .s_ctrlport_resp_ack (/* unused */ ), + .s_ctrlport_resp_status (/* unused */ ), + .s_ctrlport_resp_data (/* unused */ ) + ); + + // ------------------------------------------------ + // Segment Address space into the three functions: + // - Block Specific (incl. global regs) + // - Connections + // ------------------------------------------------ + + reg [15:0] req_addr = 16'h0; + reg [31:0] req_data = 32'h0; + reg blk_req_wr = 1'b0; + reg blk_req_rd = 1'b0; + reg blk_resp_ack = 1'b0; + reg [31:0] blk_resp_data = 32'h0; + reg con_req_wr = 1'b0; + reg con_req_rd = 1'b0; + reg con_resp_ack = 1'b0; + reg [31:0] con_resp_data = 32'h0; + + // Shortcuts + wire blk_addr_space = (ctrlport_req_addr[19:16] == 4'd0); + wire con_addr_space = (ctrlport_req_addr[19:16] == 4'd1); + + // ControlPort MUX + always @(posedge core_ctrl_clk) begin + // Write strobe + blk_req_wr <= ctrlport_req_wr & blk_addr_space; + con_req_wr <= ctrlport_req_wr & con_addr_space; + // Read strobe + blk_req_rd <= ctrlport_req_rd & blk_addr_space; + con_req_rd <= ctrlport_req_rd & con_addr_space; + // Address and Data (shared) + req_addr <= ctrlport_req_addr[15:0]; + req_data <= ctrlport_req_data; + // Response + ctrlport_resp_ack <= blk_resp_ack | con_resp_ack; + if (blk_resp_ack) + ctrlport_resp_data <= blk_resp_data; + else + ctrlport_resp_data <= con_resp_data; + end + + // ----------------------------------- + // Block Address Space + // ----------------------------------- + + // Arrange the backend block wires into a 2-d array where the + // outer index represents the slot number and the inner index represents + // a register index for that slot. We have 512 bits of read/write + // data which translates to 16 32-bit registers per slot. The first slot + // belongs to this endpoint, the next N slots map to the instantiated + // stream endpoints and the remaining slots map to block control and + // status endpoint. The slot number has a 1-to-1 mapping to the port + // number on the control crossbar. + localparam NUM_REGS_PER_SLOT = 512/32; + localparam NUM_SLOTS = 1 /*this*/ + NUM_STREAM_ENDPOINTS + NUM_BLOCKS; + localparam BLOCK_OFFSET = 1 /*this*/ + NUM_STREAM_ENDPOINTS; + + reg [31:0] config_arr_2d [0:NUM_SLOTS-1][0:NUM_REGS_PER_SLOT-1]; + wire [31:0] status_arr_2d [0:NUM_SLOTS-1][0:NUM_REGS_PER_SLOT-1]; + + genvar b, i; + generate + for (b = 0; b < NUM_BLOCKS; b=b+1) begin + for (i = 0; i < NUM_REGS_PER_SLOT; i=i+1) begin + assign rfnoc_core_config[(b*512)+(i*32) +: 32] = config_arr_2d[b+BLOCK_OFFSET][i]; + assign status_arr_2d[b+BLOCK_OFFSET][i] = rfnoc_core_status[(b*512)+(i*32) +: 32]; + end + end + endgenerate + + integer m, n; + always @(posedge core_ctrl_clk) begin + if (core_ctrl_rst) begin + blk_resp_ack <= 1'b0; + for (m = 0; m < NUM_SLOTS; m = m + 1) begin + for (n = 0; n < NUM_REGS_PER_SLOT; n = n + 1) begin + config_arr_2d[m][n] <= BEC_DEFAULT_VAL[(n*32)+:32]; + end + end + end else begin + // All transactions finish in 1 cycle + blk_resp_ack <= blk_req_wr | blk_req_rd; + // Handle register writes + if (blk_req_wr) begin + config_arr_2d[req_addr[$clog2(NUM_SLOTS)+5:6]][req_addr[5:2]] <= req_data; + end + // Handle register reads + if (blk_req_rd) begin + blk_resp_data <= status_arr_2d[req_addr[$clog2(NUM_SLOTS)+5:6]][req_addr[5:2]]; + end + end + end + + // Global Registers + localparam [3:0] REG_GLOBAL_PROTOVER = 4'd0; // Offset = 0x00 + localparam [3:0] REG_GLOBAL_PORT_CNT = 4'd1; // Offset = 0x04 + localparam [3:0] REG_GLOBAL_EDGE_CNT = 4'd2; // Offset = 0x08 + localparam [3:0] REG_GLOBAL_DEVICE_INFO = 4'd3; // Offset = 0x0C + localparam [3:0] REG_GLOBAL_ENDPOINT_CTRL_CNT = 4'd4; // Offset = 0x10 + + // Clock-crossing for device_id. + // FIFO going from core_chdr_clk domain to core_ctrl_clk. + wire device_id_fifo_ovalid; + wire [15:0] device_id_fifo_odata; + axi_fifo_2clk # ( + .WIDTH (16), + .SIZE (2) + ) device_id_fifo_i ( + .reset (1'b0), + .i_aclk (core_chdr_clk), + .i_tdata (device_id), + .i_tvalid (1'b1), + .i_tready (), + .o_aclk (core_ctrl_clk), + .o_tdata (device_id_fifo_odata), + .o_tvalid (device_id_fifo_ovalid), + .o_tready (1'b1) + ); + // Register the FIFO's output to always have valid data available. + reg [15:0] device_id_ctrl_clk = 16'h0; + always @(posedge core_ctrl_clk) begin + if (device_id_fifo_ovalid) begin + device_id_ctrl_clk <= device_id_fifo_odata; + end + end + + // Signature and protocol version + assign status_arr_2d[RFNOC_CORE_PORT_ID][REG_GLOBAL_PROTOVER] = {16'h12C6, PROTOVER[15:0]}; + + // Global port count register + localparam [0:0] STATIC_ROUTER_PRESENT = (NUM_EDGES == 12'd0) ? 1'b0 : 1'b1; + assign status_arr_2d[RFNOC_CORE_PORT_ID][REG_GLOBAL_PORT_CNT] = + {STATIC_ROUTER_PRESENT, CHDR_XBAR_PRESENT, + NUM_TRANSPORTS[9:0], NUM_BLOCKS[9:0], NUM_STREAM_ENDPOINTS[9:0]}; + // Global edge count register + assign status_arr_2d[RFNOC_CORE_PORT_ID][REG_GLOBAL_EDGE_CNT] = {20'd0, NUM_EDGES[11:0]}; + // Device information + assign status_arr_2d[RFNOC_CORE_PORT_ID][REG_GLOBAL_DEVICE_INFO] = {DEVICE_TYPE, device_id_ctrl_clk}; + // Number of stream endpoint connected to the ctrl crossbar + assign status_arr_2d[RFNOC_CORE_PORT_ID][REG_GLOBAL_ENDPOINT_CTRL_CNT] = {22'b0, NUM_ENDPOINTS_CTRL[9:0]}; + + // ----------------------------------- + // Connections Address Space + // ----------------------------------- + + // All inter-block static connections must be stored in a memory + // file which will be used to initialize a ROM that can be read + // by software for topology discovery. The format of the memory + // must be as follows: + // * Word Width: 32 bits + // * Maximum Depth: 16384 entries + // * Layout: + // - 0x000 : HEADER + // - 0x001 : EDGE_0_DEF + // - 0x002 : EDGE_1_DEF + // ... + // - 0xFFF : EDGE_4094_DEF + // + // where: + // * HEADER = {18'd0, NumEntries[13:0]} + // * EDGE_<N>_DEF = {SrcBlkIndex[9:0], SrcBlkPort[5:0], DstBlkIndex[9:0], DstBlkPort[5:0]} + // + // The BlkIndex is the port number of the block on the control crossbar amd the BlkPort is + // the index of the input or output port of the block. + + generate if (EDGE_TBL_FILE == "" || NUM_EDGES == 0) begin + // If no file is specified or if the number of edges is zero + // then just return zero for all transactions + always @(posedge core_ctrl_clk) begin + con_resp_ack <= (con_req_wr | con_req_rd); + con_resp_data <= 32'h0; + end + end else begin + // Initialize ROM from file and read it during a reg transaction + reg [31:0] edge_tbl_rom[0:NUM_EDGES]; + initial begin + $readmemh(EDGE_TBL_FILE, edge_tbl_rom, 0, NUM_EDGES); + end + always @(posedge core_ctrl_clk) begin + con_resp_ack <= (con_req_wr | con_req_rd); + con_resp_data <= edge_tbl_rom[req_addr[$clog2(NUM_EDGES+1)+1:2]]; + end + end endgenerate + +endmodule // rfnoc_core_kernel + diff --git a/fpga/usrp3/lib/rfnoc/counter.v b/fpga/usrp3/lib/rfnoc/counter.v new file mode 100644 index 000000000..f3480aaf6 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/counter.v @@ -0,0 +1,37 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Simple counter, reset by i_tlast on input side. i_tdata not connected +// Most useful for indexing a RAM, creating a ramp, etc. + +module counter + #(parameter WIDTH=16) + (input clk, input reset, input clear, + input [WIDTH:0] max, + input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + reg [WIDTH-1:0] count; + + wire do_it = o_tready & i_tvalid; + wire done = (count >= (max-1)); + + always @(posedge clk) + if(reset | clear) + count <= 0; + else + if(do_it) + if( done | i_tlast ) + count <= 0; + else + count <= count + 1; + + assign o_tdata = count; + assign o_tlast = done | i_tlast; + assign o_tvalid = i_tvalid; + assign i_tready = do_it; + +endmodule // counter diff --git a/fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs b/fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs new file mode 100644 index 000000000..6fa49cd04 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs @@ -0,0 +1,25 @@ +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +################################################## +# Crossbar Sources +################################################## +RFNOC_XBAR_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/crossbar/, \ +axis_ctrl_crossbar_2d_mesh.v \ +axis_ctrl_crossbar_nxn.v \ +torus_2d_dor_router_single_sw.v \ +mesh_2d_dor_router_single_sw.v \ +axis_ingress_vc_buff.v \ +axis_switch.v \ +axis_port_terminator.v \ +chdr_crossbar_nxn.v \ +chdr_xb_ingress_buff.v \ +chdr_xb_routing_table.v \ +)) + +# Unused sources +# torus_2d_dor_router_multi_sw.v \ +# mesh_2d_dor_router_multi_sw.v \ diff --git a/fpga/usrp3/lib/rfnoc/crossbar/README.pdf b/fpga/usrp3/lib/rfnoc/crossbar/README.pdf Binary files differnew file mode 100644 index 000000000..838702bd1 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/README.pdf diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v new file mode 100644 index 000000000..e69bdfe3c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v @@ -0,0 +1,288 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ctrl_crossbar_2d_mesh +// Description: +// This module implements a 2-dimentional (2d) mesh network (mesh) crossbar +// for AXIS-CTRL traffic. Supports mesh and torus topologies. +// It uses AXI-Stream for all of its links. +// The torus topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - WIDTH: Width of the AXI-Stream data bus +// - DIM_SIZE: Number of routers alone one dimension (# Nodes = DIM_SIZE * DIM_SIZE) +// - TOPOLOGY: Is this a mesh (MESH) or a torus (TORUS) topology +// - INGRESS_BUFF_SIZE: log2 of the ingress terminal buffer size (in words) +// - ROUTER_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words) +// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers. +// * WORMHOLE: Allocate route as soon as first word in pkt arrives +// * CUT-THROUGH: Allocate route only after the full pkt arrives +// - SWITCH_ALLOC: Algorithm to allocate the switch +// * PRIO: Priority based. Priority: Y-dim > X-dim > Term +// * ROUND-ROBIN: Round robin input port allocation +// - DEADLOCK_TIMEOUT: Number of cycles to wait until a deadlock is detected +// Signals: +// - s_axis_*: Slave port for router (flattened) +// - m_axis_*: Master port for router (flattened) +// + +module axis_ctrl_crossbar_2d_mesh #( + parameter DIM_SIZE = 4, + parameter WIDTH = 64, + parameter TOPOLOGY = "MESH", + parameter INGRESS_BUFF_SIZE = 5, + parameter ROUTER_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", + parameter SWITCH_ALLOC = "PRIO", + parameter DEADLOCK_TIMEOUT = 16384 +) ( + input wire clk, + input wire reset, + // Inputs + input wire [(DIM_SIZE*DIM_SIZE*WIDTH)-1:0] s_axis_tdata, + input wire [DIM_SIZE*DIM_SIZE-1:0] s_axis_tlast, + input wire [DIM_SIZE*DIM_SIZE-1:0] s_axis_tvalid, + output wire [DIM_SIZE*DIM_SIZE-1:0] s_axis_tready, + // Output + output wire [(DIM_SIZE*DIM_SIZE*WIDTH)-1:0] m_axis_tdata, + output wire [DIM_SIZE*DIM_SIZE-1:0] m_axis_tlast, + output wire [DIM_SIZE*DIM_SIZE-1:0] m_axis_tvalid, + input wire [DIM_SIZE*DIM_SIZE-1:0] m_axis_tready, + // Deadlock alert + output wire deadlock_detected +); + + `include "mesh_node_mapping.vh" + + //------------------------------------------------------- + // Unflatten input and output ports + //------------------------------------------------------- + + wire [WIDTH-1:0] i_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire i_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire i_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire i_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire [WIDTH-1:0] o_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire o_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire o_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire o_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire clear_routers = deadlock_detected; + + genvar p,x,y; + generate + for (p = 0; p < DIM_SIZE*DIM_SIZE; p=p+1) begin + assign i_tdata_arr[node_to_ydst(p)][node_to_xdst(p)] = s_axis_tdata[p*WIDTH +: WIDTH]; + assign i_tlast_arr[node_to_ydst(p)][node_to_xdst(p)] = s_axis_tlast[p]; + assign i_tvalid_arr[node_to_ydst(p)][node_to_xdst(p)] = s_axis_tvalid[p]; + assign s_axis_tready[p] = i_tready_arr[node_to_ydst(p)][node_to_xdst(p)] | clear_routers; + + assign m_axis_tdata[p*WIDTH +: WIDTH] = o_tdata_arr[node_to_ydst(p)][node_to_xdst(p)]; + assign m_axis_tlast[p] = o_tlast_arr [node_to_ydst(p)][node_to_xdst(p)]; + assign m_axis_tvalid[p] = o_tvalid_arr[node_to_ydst(p)][node_to_xdst(p)] & ~clear_routers; + assign o_tready_arr[node_to_ydst(p)][node_to_xdst(p)] = m_axis_tready[p]; + end + endgenerate + + //------------------------------------------------------- + // Instantiate routers + //------------------------------------------------------- + + wire [WIDTH-1:0] e2w_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire e2w_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire e2w_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire e2w_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire e2w_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire [WIDTH-1:0] w2e_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire w2e_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire w2e_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire w2e_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire w2e_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire [WIDTH-1:0] n2s_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire n2s_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire n2s_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire n2s_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire n2s_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire [WIDTH-1:0] s2n_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire s2n_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire s2n_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire s2n_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire s2n_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + localparam N = DIM_SIZE; + localparam NEND = DIM_SIZE - 1; + localparam [WIDTH-1:0] ZERO = {WIDTH{1'b0}}; + + generate + for (y = 0; y < DIM_SIZE; y=y+1) begin: ydim + for (x = 0; x < DIM_SIZE; x=x+1) begin: xdim + if (TOPOLOGY == "MESH") begin + mesh_2d_dor_router_single_sw #( + .WIDTH (WIDTH), + .DIM_SIZE (DIM_SIZE), + .XB_ADDR_X (x), + .XB_ADDR_Y (y), + .TERM_BUFF_SIZE (INGRESS_BUFF_SIZE), + .XB_BUFF_SIZE (ROUTER_BUFF_SIZE), + .ROUTING_ALLOC (ROUTING_ALLOC), + .SWITCH_ALLOC (SWITCH_ALLOC) + ) rtr_i ( + // Clock and reset + .clk (clk), + .reset (reset | clear_routers), + // Terminals + .s_axis_ter_tdata (i_tdata_arr [y][x]), + .s_axis_ter_tlast (i_tlast_arr [y][x]), + .s_axis_ter_tvalid (i_tvalid_arr[y][x]), + .s_axis_ter_tready (i_tready_arr[y][x]), + .m_axis_ter_tdata (o_tdata_arr [y][x]), + .m_axis_ter_tlast (o_tlast_arr [y][x]), + .m_axis_ter_tvalid (o_tvalid_arr[y][x]), + .m_axis_ter_tready (o_tready_arr[y][x]), + // West connections + .s_axis_wst_tdata ((x != 0) ? e2w_tdata_arr [y][x] : ZERO), + .s_axis_wst_tdest ((x != 0) ? e2w_tdest_arr [y][x] : 1'b0), + .s_axis_wst_tlast ((x != 0) ? e2w_tlast_arr [y][x] : 1'b0), + .s_axis_wst_tvalid ((x != 0) ? e2w_tvalid_arr[y][x] : 1'b0), + .s_axis_wst_tready ( e2w_tready_arr[y][x] ), + .m_axis_wst_tdata ( w2e_tdata_arr [y][(x+N-1)%N] ), + .m_axis_wst_tdest ( w2e_tdest_arr [y][(x+N-1)%N] ), + .m_axis_wst_tlast ( w2e_tlast_arr [y][(x+N-1)%N] ), + .m_axis_wst_tvalid ( w2e_tvalid_arr[y][(x+N-1)%N] ), + .m_axis_wst_tready ((x != 0) ? w2e_tready_arr[y][(x+N-1)%N] : 1'b1), + // East connections + .s_axis_est_tdata ((x != NEND) ? w2e_tdata_arr [y][x] : ZERO), + .s_axis_est_tdest ((x != NEND) ? w2e_tdest_arr [y][x] : 1'b0), + .s_axis_est_tlast ((x != NEND) ? w2e_tlast_arr [y][x] : 1'b0), + .s_axis_est_tvalid ((x != NEND) ? w2e_tvalid_arr[y][x] : 1'b0), + .s_axis_est_tready ( w2e_tready_arr[y][x] ), + .m_axis_est_tdata ( e2w_tdata_arr [y][(x+1)%N] ), + .m_axis_est_tdest ( e2w_tdest_arr [y][(x+1)%N] ), + .m_axis_est_tlast ( e2w_tlast_arr [y][(x+1)%N] ), + .m_axis_est_tvalid ( e2w_tvalid_arr[y][(x+1)%N] ), + .m_axis_est_tready ((x != NEND) ? e2w_tready_arr[y][(x+1)%N] : 1'b1), + // North connections + .s_axis_nor_tdata ((y != 0) ? s2n_tdata_arr [y][x] : ZERO), + .s_axis_nor_tdest ((y != 0) ? s2n_tdest_arr [y][x] : 1'b0), + .s_axis_nor_tlast ((y != 0) ? s2n_tlast_arr [y][x] : 1'b0), + .s_axis_nor_tvalid ((y != 0) ? s2n_tvalid_arr[y][x] : 1'b0), + .s_axis_nor_tready ( s2n_tready_arr[y][x] ), + .m_axis_nor_tdata ( n2s_tdata_arr [(y+N-1)%N][x] ), + .m_axis_nor_tdest ( n2s_tdest_arr [(y+N-1)%N][x] ), + .m_axis_nor_tlast ( n2s_tlast_arr [(y+N-1)%N][x] ), + .m_axis_nor_tvalid ( n2s_tvalid_arr[(y+N-1)%N][x] ), + .m_axis_nor_tready ((y != 0) ? n2s_tready_arr[(y+N-1)%N][x] : 1'b1), + // South connections + .s_axis_sou_tdata ((y != NEND) ? n2s_tdata_arr [y][x] : ZERO), + .s_axis_sou_tdest ((y != NEND) ? n2s_tdest_arr [y][x] : 1'b0), + .s_axis_sou_tlast ((y != NEND) ? n2s_tlast_arr [y][x] : 1'b0), + .s_axis_sou_tvalid ((y != NEND) ? n2s_tvalid_arr[y][x] : 1'b0), + .s_axis_sou_tready ( n2s_tready_arr[y][x] ), + .m_axis_sou_tdata ( s2n_tdata_arr [(y+1)%N][x] ), + .m_axis_sou_tdest ( s2n_tdest_arr [(y+1)%N][x] ), + .m_axis_sou_tlast ( s2n_tlast_arr [(y+1)%N][x] ), + .m_axis_sou_tvalid ( s2n_tvalid_arr[(y+1)%N][x] ), + .m_axis_sou_tready ((y != NEND) ? s2n_tready_arr[(y+1)%N][x] : 1'b1) + ); + end else begin + torus_2d_dor_router_single_sw #( + .WIDTH (WIDTH), + .DIM_SIZE (DIM_SIZE), + .XB_ADDR_X (x), + .XB_ADDR_Y (y), + .TERM_BUFF_SIZE (INGRESS_BUFF_SIZE), + .XB_BUFF_SIZE (ROUTER_BUFF_SIZE), + .ROUTING_ALLOC (ROUTING_ALLOC), + .SWITCH_ALLOC (SWITCH_ALLOC) + ) rtr_i ( + // Clock and reset + .clk (clk), + .reset (reset | clear_routers), + // Terminals + .s_axis_term_tdata (i_tdata_arr [y][x]), + .s_axis_term_tlast (i_tlast_arr [y][x]), + .s_axis_term_tvalid (i_tvalid_arr[y][x]), + .s_axis_term_tready (i_tready_arr[y][x]), + .m_axis_term_tdata (o_tdata_arr [y][x]), + .m_axis_term_tlast (o_tlast_arr [y][x]), + .m_axis_term_tvalid (o_tvalid_arr[y][x]), + .m_axis_term_tready (o_tready_arr[y][x]), + // X-dim connections + .s_axis_xdim_tdata (e2w_tdata_arr [y][x] ), + .s_axis_xdim_tdest (e2w_tdest_arr [y][x] ), + .s_axis_xdim_tlast (e2w_tlast_arr [y][x] ), + .s_axis_xdim_tvalid (e2w_tvalid_arr[y][x] ), + .s_axis_xdim_tready (e2w_tready_arr[y][x] ), + .m_axis_xdim_tdata (e2w_tdata_arr [y][(x+1)%N]), + .m_axis_xdim_tdest (e2w_tdest_arr [y][(x+1)%N]), + .m_axis_xdim_tlast (e2w_tlast_arr [y][(x+1)%N]), + .m_axis_xdim_tvalid (e2w_tvalid_arr[y][(x+1)%N]), + .m_axis_xdim_tready (e2w_tready_arr[y][(x+1)%N]), + // Y-dim connections + .s_axis_ydim_tdata (s2n_tdata_arr [y][x] ), + .s_axis_ydim_tdest (s2n_tdest_arr [y][x] ), + .s_axis_ydim_tlast (s2n_tlast_arr [y][x] ), + .s_axis_ydim_tvalid (s2n_tvalid_arr[y][x] ), + .s_axis_ydim_tready (s2n_tready_arr[y][x] ), + .m_axis_ydim_tdata (s2n_tdata_arr [(y+1)%N][x]), + .m_axis_ydim_tdest (s2n_tdest_arr [(y+1)%N][x]), + .m_axis_ydim_tlast (s2n_tlast_arr [(y+1)%N][x]), + .m_axis_ydim_tvalid (s2n_tvalid_arr[(y+1)%N][x]), + .m_axis_ydim_tready (s2n_tready_arr[(y+1)%N][x]) + ); + end + end + end + endgenerate + + //------------------------------------------------------- + // Deadlock detector + //------------------------------------------------------- + // A deadlock is defined on an AXIS bus as an extended period + // where tvlid=1 but tready=0. If at least one slave port is in + // this state and none of the master ports are then this router + // will go into a failsafe deadlock recovery mode. The DEADLOCK_TIMEOUT + // parameter defines the duration for which this condition has + // to be true. In deadlock recovery mode, all routers are held in reset + // (thus losing all packets in flights) and all input ports are flushed. + + wire m_locked = |(m_axis_tvalid & ~m_axis_tready); + wire s_locked = |(s_axis_tvalid & ~s_axis_tready); + + // A counter that tracks the duration for which the router is livelocked + // If the livelock duration is higher than DEADLOCK_TIMEOUT then it is a + // deadlock + reg [$clog2(DEADLOCK_TIMEOUT)-1:0] deadlock_counter = DEADLOCK_TIMEOUT-1; + always @(posedge clk) begin + if (reset | ~(s_locked & ~m_locked)) begin + deadlock_counter <= DEADLOCK_TIMEOUT-1; + end else if (deadlock_counter != 'd0) begin + deadlock_counter <= deadlock_counter - 1; + end + end + + // A counter that tracks the deadlock recovery period. If the slave ports + // have no activity for DEADLOCK_TIMEOUT cycles then the router can + // successfully come out of the deadlocked state. + reg [$clog2(DEADLOCK_TIMEOUT)-1:0] deadlock_recover_counter = 'd0; + always @(posedge clk) begin + if (reset) begin + deadlock_recover_counter <= 'd0; + end else if (deadlock_detected) begin + if (|s_axis_tvalid) + deadlock_recover_counter <= DEADLOCK_TIMEOUT-1; + else + deadlock_recover_counter <= deadlock_recover_counter - 1; + end else if (deadlock_counter == 'd0) begin + deadlock_recover_counter <= DEADLOCK_TIMEOUT-1; + end + end + assign deadlock_detected = (deadlock_recover_counter != 0); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v new file mode 100644 index 000000000..6de082b4c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v @@ -0,0 +1,130 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ctrl_crossbar_nxn +// Description: +// This module implements a 2-dimentional (2d) mesh network (mesh) crossbar +// for AXIS-CTRL traffic. Supports mesh and torus topologies. +// It uses AXI-Stream for all of its links. +// The torus topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - WIDTH: Width of the AXI-Stream data bus +// - NPORTS: Number of ports (maximum 1024) +// - TOPOLOGY: Is this a mesh (MESH) or a torus (TORUS) topology +// - INGRESS_BUFF_SIZE: log2 of the ingress terminal buffer size (in words) +// - ROUTER_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words) +// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers. +// * WORMHOLE: Allocate route as soon as first word in pkt arrives +// * CUT-THROUGH: Allocate route only after the full pkt arrives +// - SWITCH_ALLOC: Algorithm to allocate the switch +// * PRIO: Priority based. Priority: Y-dim > X-dim > Term +// * ROUND-ROBIN: Round robin input port allocation +// - DEADLOCK_TIMEOUT: Number of cycles to wait until a deadlock is detected +// Signals: +// - s_axis_*: Slave port for router (flattened) +// - m_axis_*: Master port for router (flattened) +// + +module axis_ctrl_crossbar_nxn #( + parameter WIDTH = 32, + parameter NPORTS = 10, + parameter TOPOLOGY = "TORUS", + parameter INGRESS_BUFF_SIZE = 5, + parameter ROUTER_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", + parameter SWITCH_ALLOC = "PRIO", + parameter DEADLOCK_TIMEOUT = 16384 +) ( + input wire clk, + input wire reset, + // Inputs + input wire [(NPORTS*WIDTH)-1:0] s_axis_tdata, + input wire [NPORTS-1:0] s_axis_tlast, + input wire [NPORTS-1:0] s_axis_tvalid, + output wire [NPORTS-1:0] s_axis_tready, + // Output + output wire [(NPORTS*WIDTH)-1:0] m_axis_tdata, + output wire [NPORTS-1:0] m_axis_tlast, + output wire [NPORTS-1:0] m_axis_tvalid, + input wire [NPORTS-1:0] m_axis_tready, + // Deadlock alert + output wire deadlock_detected +); + + function integer csqrt_max1024; + input integer value; + integer i; + begin + csqrt_max1024 = 1; + for (i = 1; i <= 32; i = i + 1) // sqrt(1024) = 32 + csqrt_max1024 = csqrt_max1024 + (i*i < value ? 1 : 0); + end + endfunction + + localparam integer DIM_SIZE = csqrt_max1024(NPORTS); + + wire [(DIM_SIZE*DIM_SIZE*WIDTH)-1:0] i_tdata, o_tdata ; + wire [DIM_SIZE*DIM_SIZE-1:0] i_tlast, o_tlast ; + wire [DIM_SIZE*DIM_SIZE-1:0] i_tvalid, o_tvalid; + wire [DIM_SIZE*DIM_SIZE-1:0] i_tready, o_tready; + + // axis_ctrl_crossbar_2d_mesh needs to scale up in squares + // i.e. 4, 9, 16, 25, ... but NPORTS can be any number, so + // instantiate the next highest square number of ports and + // terminate the rest. + axis_ctrl_crossbar_2d_mesh #( + .WIDTH (WIDTH), + .DIM_SIZE (DIM_SIZE), + .TOPOLOGY (TOPOLOGY), + .INGRESS_BUFF_SIZE(INGRESS_BUFF_SIZE), + .ROUTER_BUFF_SIZE (ROUTER_BUFF_SIZE), + .ROUTING_ALLOC (ROUTING_ALLOC), + .SWITCH_ALLOC (SWITCH_ALLOC), + .DEADLOCK_TIMEOUT (DEADLOCK_TIMEOUT) + ) router_dut_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (i_tdata), + .s_axis_tlast (i_tlast), + .s_axis_tvalid (i_tvalid), + .s_axis_tready (i_tready), + .m_axis_tdata (o_tdata), + .m_axis_tlast (o_tlast), + .m_axis_tvalid (o_tvalid), + .m_axis_tready (o_tready), + .deadlock_detected(deadlock_detected) + ); + + // Connect the bottom NPORTS to the IO + assign i_tdata[(NPORTS*WIDTH)-1:0] = s_axis_tdata; + assign i_tlast[NPORTS-1:0] = s_axis_tlast; + assign i_tvalid[NPORTS-1:0] = s_axis_tvalid; + assign s_axis_tready = i_tready[NPORTS-1:0]; + + assign m_axis_tdata = o_tdata[(NPORTS*WIDTH)-1:0]; + assign m_axis_tlast = o_tlast[NPORTS-1:0]; + assign m_axis_tvalid = o_tvalid[NPORTS-1:0]; + assign o_tready[NPORTS-1:0] = m_axis_tready; + + // Terminate the rest + genvar i; + generate for (i = NPORTS; i < (DIM_SIZE*DIM_SIZE); i = i + 1) begin: ports + axis_port_terminator #(.DATA_W(WIDTH)) term_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (o_tdata[(i*WIDTH)+:WIDTH]), + .s_axis_tlast (o_tlast[i]), + .s_axis_tvalid(o_tvalid[i]), + .s_axis_tready(o_tready[i]), + .m_axis_tdata (i_tdata[(i*WIDTH)+:WIDTH]), + .m_axis_tlast (i_tlast[i]), + .m_axis_tvalid(i_tvalid[i]), + .m_axis_tready(i_tready[i]), + .pkts_dropped () + ); + end endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v new file mode 100644 index 000000000..fd10d6682 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v @@ -0,0 +1,178 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ingress_vc_buff +// Description: +// A wrapper around a buffer to implement one or more virtual channels +// Supports gate a packet for cut-through routing + +module axis_ingress_vc_buff #( + parameter WIDTH = 64, // Width of the datapath + parameter NUM_VCS = 2, // Number of virtual channels + parameter SIZE = 5, // Virtual channel buffer size + parameter ROUTING = "WORMHOLE", // Routing (switching) method {WORMHOLE, CUT-THROUGH} + parameter DEST_W = (NUM_VCS > 1) ? $clog2(NUM_VCS) : 1 // PRIVATE +) ( + input wire clk, + input wire reset, + input wire [WIDTH-1:0] s_axis_tdata, + input wire [DEST_W-1:0] s_axis_tdest, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + output wire [WIDTH-1:0] m_axis_tdata, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready +); + + generate if (NUM_VCS > 1) begin + //---------------------------------------------------- + // Multiple virtual channels + //---------------------------------------------------- + + wire [(WIDTH*NUM_VCS)-1:0] bufin_tdata , bufout_tdata ; + wire [NUM_VCS-1:0] bufin_tlast , bufout_tlast ; + wire [NUM_VCS-1:0] bufin_tvalid, bufout_tvalid; + wire [NUM_VCS-1:0] bufin_tready, bufout_tready; + + axi_demux #( + .WIDTH(WIDTH), .SIZE(NUM_VCS), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) vc_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (/* unused */), + .dest (s_axis_tdest ), + .i_tdata (s_axis_tdata ), + .i_tlast (s_axis_tlast ), + .i_tvalid (s_axis_tvalid), + .i_tready (s_axis_tready), + .o_tdata (bufin_tdata), + .o_tlast (bufin_tlast), + .o_tvalid (bufin_tvalid), + .o_tready (bufin_tready) + ); + + genvar vc; + for (vc = 0; vc < NUM_VCS; vc = vc + 1) begin + if (ROUTING == "WORMHOLE") begin + axi_fifo #( + .WIDTH(WIDTH+1), .SIZE(SIZE) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({bufin_tlast[vc], bufin_tdata [(vc*WIDTH)+:WIDTH]}), + .i_tvalid (bufin_tvalid [vc]), + .i_tready (bufin_tready [vc]), + .o_tdata ({bufout_tlast[vc], bufout_tdata [(vc*WIDTH)+:WIDTH]}), + .o_tvalid (bufout_tvalid[vc]), + .o_tready (bufout_tready[vc]), + .space (), + .occupied () + ); + end else begin + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(SIZE) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (bufin_tdata[(vc*WIDTH)+:WIDTH]), + .i_tlast (bufin_tlast[vc]), + .i_tvalid (bufin_tvalid[vc]), + .i_tready (bufin_tready[vc]), + .i_terror (1'b0), + .o_tdata (bufout_tdata[(vc*WIDTH)+:WIDTH]), + .o_tlast (bufout_tlast[vc]), + .o_tvalid (bufout_tvalid[vc]), + .o_tready (bufout_tready[vc]) + ); + end + end + + axi_mux #( + .WIDTH(WIDTH), .SIZE(NUM_VCS), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) vc_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (bufout_tdata ), + .i_tlast (bufout_tlast ), + .i_tvalid (bufout_tvalid), + .i_tready (bufout_tready), + .o_tdata (m_axis_tdata ), + .o_tlast (m_axis_tlast ), + .o_tvalid (m_axis_tvalid), + .o_tready (m_axis_tready) + ); + + end else begin + //---------------------------------------------------- + // Single virtual channel + //---------------------------------------------------- + wire [WIDTH-1:0] pipe_tdata; + wire pipe_tlast; + wire pipe_tvalid; + wire pipe_tready; + + if (ROUTING == "WORMHOLE") begin + axi_fifo #( + .WIDTH(WIDTH+1), .SIZE(SIZE) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({s_axis_tlast, s_axis_tdata}), + .i_tvalid (s_axis_tvalid ), + .i_tready (s_axis_tready ), + .o_tdata ({pipe_tlast, pipe_tdata}), + .o_tvalid (pipe_tvalid), + .o_tready (pipe_tready), + .space (), + .occupied () + ); + end else begin + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(SIZE) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_tdata), + .i_tlast (s_axis_tlast), + .i_tvalid (s_axis_tvalid), + .i_tready (s_axis_tready), + .i_terror (1'b0), + .o_tdata (pipe_tdata), + .o_tlast (pipe_tlast), + .o_tvalid (pipe_tvalid), + .o_tready (pipe_tready) + ); + end + + axi_fifo #( + .WIDTH(WIDTH+1), .SIZE(1) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({pipe_tlast, pipe_tdata}), + .i_tvalid (pipe_tvalid ), + .i_tready (pipe_tready ), + .o_tdata ({m_axis_tlast, m_axis_tdata}), + .o_tvalid (m_axis_tvalid), + .o_tready (m_axis_tready), + .space (), + .occupied () + ); + + end endgenerate + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v new file mode 100644 index 000000000..bf9fa24be --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v @@ -0,0 +1,44 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_port_terminator +// Description: +// A dummy terminator for unused crossbar ports + +module axis_port_terminator #( + parameter DATA_W = 64 +) ( + // Clocks and resets + input wire clk, + input wire reset, + // Input ports + input wire [DATA_W-1:0] s_axis_tdata, // Input data + input wire s_axis_tlast, // Input EOP (last) + input wire s_axis_tvalid, // Input valid + output wire s_axis_tready, // Input ready + // Output ports + output wire [DATA_W-1:0] m_axis_tdata, // Output data + output wire m_axis_tlast, // Output EOP (last) + output wire m_axis_tvalid, // Output valid + input wire m_axis_tready, // Output ready + // Metrics + output reg [15:0] pkts_dropped +); + + assign s_axis_tready = 1'b1; + assign m_axis_tdata = {DATA_W{1'b0}}; + assign m_axis_tlast = 1'b0; + assign m_axis_tvalid = 1'b0; + + always @(posedge clk) begin + if (reset) begin + pkts_dropped <= 'd0; + end else if (s_axis_tvalid & s_axis_tlast & s_axis_tready) begin + pkts_dropped <= pkts_dropped + 'd1; + end + end + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v new file mode 100644 index 000000000..24b9e4129 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v @@ -0,0 +1,164 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_switch +// Description: +// Implementation of a M-input, N-output AXI-Stream switch. +// One of the M input ports is allocated based on the s_axis_alloc signal +// and the packet on that port is sent to one of the N output ports based +// on the tdest signal + +module axis_switch #( + parameter DATA_W = 64, // tdata width + parameter DEST_W = 1, // Output tdest width + parameter IN_PORTS = 3, // Number of input ports + parameter OUT_PORTS = 3, // Number of output ports + parameter PIPELINE = 1, // Instantiate output pipeline stage? + parameter ALLOC_W = (IN_PORTS == 1) ? 1 : $clog2(IN_PORTS) //PRIVATE +) ( + // Clocks and resets + input wire clk, // Switch clock + input wire reset, // Reset + // Input ports + input wire [(DATA_W*IN_PORTS)-1:0] s_axis_tdata, // Input data + input wire [((DEST_W+$clog2(OUT_PORTS))*IN_PORTS)-1:0] s_axis_tdest, // Input destination + input wire [IN_PORTS-1:0] s_axis_tlast, // Input EOP (last) + input wire [IN_PORTS-1:0] s_axis_tvalid, // Input valid + output wire [IN_PORTS-1:0] s_axis_tready, // Input ready + input wire [ALLOC_W-1:0] s_axis_alloc, // Input port allocation for switch + // Output ports + output wire [(DATA_W*OUT_PORTS)-1:0] m_axis_tdata, // Output data + output wire [(DEST_W*OUT_PORTS)-1:0] m_axis_tdest, // Output destination + output wire [OUT_PORTS-1:0] m_axis_tlast, // Output EOP (last) + output wire [OUT_PORTS-1:0] m_axis_tvalid, // Output valid + input wire [OUT_PORTS-1:0] m_axis_tready // Output ready +); + // PRIVATE: Vivado synthesizer workaround (cannot be localparam) + localparam CLOG2_IN_PORTS = $clog2(IN_PORTS); + localparam CLOG2_OUT_PORTS = $clog2(OUT_PORTS); + + //--------------------------------------------------------- + // Flatten/unflatten and pipeline + //--------------------------------------------------------- + wire [DATA_W-1:0] i_tdata [0:IN_PORTS-1]; + wire [DEST_W+$clog2(OUT_PORTS)-1:0] i_tdest [0:IN_PORTS-1]; + wire i_tlast [0:IN_PORTS-1]; + wire [IN_PORTS-1:0] i_tvalid; + wire [IN_PORTS-1:0] i_tready; + wire [ALLOC_W-1:0] i_alloc; + wire [DATA_W-1:0] o_tdata [0:OUT_PORTS-1]; + wire [DEST_W-1:0] o_tdest [0:OUT_PORTS-1]; + wire o_tlast [0:OUT_PORTS-1]; + wire [OUT_PORTS-1:0] o_tvalid; + wire [OUT_PORTS-1:0] o_tready; + + genvar i, o; + generate + for (i = 0; i < IN_PORTS; i = i + 1) begin: in_ports + assign i_tdata [i] = s_axis_tdata [(i*DATA_W)+:DATA_W]; + assign i_tdest [i] = s_axis_tdest [(i*(DEST_W+CLOG2_OUT_PORTS))+:(DEST_W+CLOG2_OUT_PORTS)]; + assign i_tlast [i] = s_axis_tlast [i]; + assign i_tvalid [i] = s_axis_tvalid[i]; + assign s_axis_tready[i] = i_tready [i]; + end + assign i_alloc = s_axis_alloc; //i_alloc has to be delay matched to valid + + for (o = 0; o < OUT_PORTS; o = o + 1) begin + if (PIPELINE == 1) begin + axi_fifo_flop2 #(.WIDTH(DEST_W+1+DATA_W)) out_pipe_i ( + .clk(clk), .reset(reset), .clear(1'b0), + .i_tdata({o_tdest[o], o_tlast[o], o_tdata[o]}), + .i_tvalid(o_tvalid[o]), .i_tready(o_tready[o]), + .o_tdata({m_axis_tdest[(o*DEST_W)+:DEST_W], m_axis_tlast[o], m_axis_tdata[(o*DATA_W)+:DATA_W]}), + .o_tvalid(m_axis_tvalid[o]), .o_tready(m_axis_tready[o]), + .space(), .occupied() + ); + end else begin + assign m_axis_tdata [(o*DATA_W)+:DATA_W] = o_tdata [o]; + assign m_axis_tdest [(o*DEST_W)+:DEST_W] = o_tdest [o]; + assign m_axis_tlast [o] = o_tlast [o]; + assign m_axis_tvalid[o] = o_tvalid [o]; + assign o_tready [o] = m_axis_tready[o]; + end + end + endgenerate + + //--------------------------------------------------------- + // Allocator + //--------------------------------------------------------- + // The "chosen" input port will drive this bus + wire [DATA_W-1:0] master_tdata; + wire [DEST_W+$clog2(OUT_PORTS)-1:0] master_tdest; + wire master_tlast; + wire master_tvalid; + wire master_tready; + + generate if (IN_PORTS > 1) begin + reg [IN_PORTS-1:0] ialloc_oh; + reg [$clog2(IN_PORTS)-1:0] alloc_reg; + always @(posedge clk) begin + if (reset) begin + ialloc_oh <= {IN_PORTS{1'b0}}; + end else begin + if (ialloc_oh == {IN_PORTS{1'b0}}) begin + if (|i_tvalid) begin + ialloc_oh[i_alloc] <= 1'b1; + alloc_reg <= i_alloc; + end + end else begin + if(master_tready & master_tvalid & master_tlast) + ialloc_oh <= {IN_PORTS{1'b0}}; + end + end + end + + assign master_tdata = i_tdata[alloc_reg]; + assign master_tdest = i_tdest[alloc_reg]; + assign master_tlast = i_tlast[alloc_reg]; + assign master_tvalid = |(i_tvalid & ialloc_oh); + assign i_tready = i_tvalid & ialloc_oh & {IN_PORTS{master_tready}}; + end else begin + // Special case: One input port + assign master_tdata = i_tdata[0]; + assign master_tdest = i_tdest[0]; + assign master_tlast = i_tlast[0]; + assign master_tvalid = i_tvalid[0]; + assign i_tready[0] = master_tready; + end endgenerate + + //--------------------------------------------------------- + // Router + //--------------------------------------------------------- + generate if (OUT_PORTS > 1) begin + reg [OUT_PORTS-1:0] odst_oh; + always @(posedge clk) begin + if (reset) begin + odst_oh <= {OUT_PORTS{1'b0}}; + end else begin + if (odst_oh == {OUT_PORTS{1'b0}}) begin + if (master_tvalid) + odst_oh[master_tdest[CLOG2_OUT_PORTS-1:0]] <= 1'b1; + end else begin + if(master_tready & master_tvalid & master_tlast) + odst_oh <= {OUT_PORTS{1'b0}}; + end + end + end + assign master_tready = |(o_tready & odst_oh); + assign o_tvalid = {OUT_PORTS{master_tvalid}} & odst_oh; + end else begin + // Special case: One output port + assign master_tready = o_tready[0]; + assign o_tvalid[0] = master_tvalid; + end endgenerate + + generate for (o = 0; o < OUT_PORTS; o = o + 1) begin + assign o_tdata[o] = master_tdata; + assign o_tdest[o] = master_tdest[DEST_W+CLOG2_OUT_PORTS-1:CLOG2_OUT_PORTS]; + assign o_tlast[o] = master_tlast; + end endgenerate + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v b/fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v new file mode 100644 index 000000000..79f1a6626 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v @@ -0,0 +1,381 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_crossbar_nxn +// Description: +// This module implements a full-bandwidth NxN crossbar with N input and output ports +// for CHDR traffic. It supports multiple optimization strategies for performance, +// area and timing tradeoffs. It uses AXI-Stream for all of its links. The crossbar +// has a dynamic routing table based on a Content Addressable Memory (CAM). The SID +// is used to determine the destination of a packet and the routing table contains +// a re-programmable SID to crossbar port mapping. The table is programmed using +// special route config packets on the data input ports or using an optional +// management port. +// The topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - CHDR_W: Width of the AXI-Stream data bus +// - NPORTS: Number of ports to instantiate +// - DEFAULT_PORT: The failsafe port to forward a packet to is SID mapping is missing +// - MTU: log2 of max packet size (in words) +// - ROUTE_TBL_SIZE: log2 of the number of mappings that the routing table can hold +// at any time. Mapping values are maintained in a FIFO fashion. +// - MUX_ALLOC: Algorithm to allocate the egress MUX +// * PRIO: Priority based. Lower port numbers have a higher priority +// * ROUND-ROBIN: Round robin input port allocation +// - OPTIMIZE: Optimization strategy for performance vs area vs timing tradeoffs +// * AREA: Attempt to minimize area at the cost of performance (throughput) and/or timing +// * PERFORMANCE: Attempt to maximize performance at the cost of area and/or timing +// * TIMING: Attempt to maximize Fmax at the cost of area and/or performance +// - NPORTS_MGMT: Number of ports with management endpoint. The first NPORTS_MGMT ports will +// have the management port instantiated +// - EXT_RTCFG_PORT: Enable a side-channel AXI-Stream management port to configure the +// routing table +// Signals: +// - s_axis_*: Slave port for router (flattened) +// - m_axis_*: Master port for router (flattened) +// - s_axis_mgmt_*: Management slave port +// - device_id: The ID of the device that has instantiated this module +// + +module chdr_crossbar_nxn #( + parameter [15:0] PROTOVER = {8'd1, 8'd0}, + parameter CHDR_W = 64, + parameter [7:0] NPORTS = 8, + parameter [7:0] DEFAULT_PORT = 0, + parameter MTU = 9, + parameter ROUTE_TBL_SIZE = 6, + parameter MUX_ALLOC = "ROUND-ROBIN", + parameter OPTIMIZE = "AREA", + parameter [7:0] NPORTS_MGMT = NPORTS, + parameter [0:0] EXT_RTCFG_PORT = 0 +) ( + input wire clk, + input wire reset, + // Device info + input wire [15:0] device_id, + // Inputs + input wire [(CHDR_W*NPORTS)-1:0] s_axis_tdata, + input wire [NPORTS-1:0] s_axis_tlast, + input wire [NPORTS-1:0] s_axis_tvalid, + output wire [NPORTS-1:0] s_axis_tready, + // Output + output wire [(CHDR_W*NPORTS)-1:0] m_axis_tdata, + output wire [NPORTS-1:0] m_axis_tlast, + output wire [NPORTS-1:0] m_axis_tvalid, + input wire [NPORTS-1:0] m_axis_tready, + // Router config management port + input wire ext_rtcfg_stb, + input wire [15:0] ext_rtcfg_addr, + input wire [31:0] ext_rtcfg_data, + output wire ext_rtcfg_ack +); + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "../core/rfnoc_chdr_utils.vh" + `include "../core/rfnoc_chdr_internal_utils.vh" + + localparam NPORTS_W = $clog2(NPORTS); + localparam EPID_W = 16; + localparam [17:0] EXT_INFO = {1'b0, EXT_RTCFG_PORT, NPORTS_MGMT, NPORTS}; + + localparam [0:0] PKT_ST_HEAD = 1'b0; + localparam [0:0] PKT_ST_BODY = 1'b1; + + // The compute_mux_alloc function is the switch allocation function for the MUX + // i.e. it chooses which input port reserves the output MUX for packet transfer. + function [NPORTS_W-1:0] compute_mux_alloc; + input [NPORTS-1:0] pkt_waiting; + input [NPORTS_W-1:0] last_alloc; + reg signed [NPORTS_W:0] i; + begin + compute_mux_alloc = last_alloc; + for (i = NPORTS-1; i >= 0; i=i-1) begin + if (MUX_ALLOC == "PRIO") begin + // Priority. Lower port index gets a higher priority. + if (pkt_waiting[i]) + compute_mux_alloc = i; + end else begin + // Round-robin + if (pkt_waiting[(last_alloc + i + 1) % NPORTS]) + compute_mux_alloc = (last_alloc + i + 1) % NPORTS; + end + end + end + endfunction + + wire [NPORTS-1:0] rtcfg_req_wr; + wire [(16*NPORTS)-1:0] rtcfg_req_addr; + wire [(32*NPORTS)-1:0] rtcfg_req_data; + wire [NPORTS-1:0] rtcfg_resp_ack; + wire [(EPID_W*NPORTS)-1:0] find_tdata; + wire [NPORTS-1:0] find_tvalid; + wire [NPORTS-1:0] find_tready; + wire [(NPORTS_W*NPORTS)-1:0] result_tdata; + wire [NPORTS-1:0] result_tkeep; + wire [NPORTS-1:0] result_tvalid; + wire [NPORTS-1:0] result_tready; + + // Instantiate a single CAM-based routing table that will be shared between all + // input ports. Configuration and lookup is performed using an AXI-Stream iface. + // If multiple packets arrive simultaneously, only the headers of those packets will + // be serialized in order to arbitrate this map. Selection is done round-robin. + chdr_xb_routing_table #( + .SIZE(ROUTE_TBL_SIZE), .NPORTS(NPORTS), + .EXT_INS_PORT_EN(EXT_RTCFG_PORT) + ) routing_tbl_i ( + .clk (clk ), + .reset (reset ), + .port_req_wr (rtcfg_req_wr ), + .port_req_addr (rtcfg_req_addr), + .port_req_data (rtcfg_req_data), + .port_resp_ack (rtcfg_resp_ack), + .ext_req_wr (ext_rtcfg_stb ), + .ext_req_addr (ext_rtcfg_addr), + .ext_req_data (ext_rtcfg_data), + .ext_resp_ack (ext_rtcfg_ack ), + .axis_find_tdata (find_tdata ), + .axis_find_tvalid (find_tvalid ), + .axis_find_tready (find_tready ), + .axis_result_tdata (result_tdata ), + .axis_result_tkeep (result_tkeep ), + .axis_result_tvalid(result_tvalid ), + .axis_result_tready(result_tready ) + ); + + wire [CHDR_W-1:0] i_tdata [0:NPORTS-1]; + wire [9:0] i_tdest [0:NPORTS-1]; + wire [1:0] i_tid [0:NPORTS-1]; + wire i_tlast [0:NPORTS-1]; + wire i_tvalid [0:NPORTS-1]; + wire i_tready [0:NPORTS-1]; + wire [CHDR_W-1:0] buf_tdata [0:NPORTS-1]; + wire [NPORTS_W-1:0] buf_tdest [0:NPORTS-1], buf_tdest_tmp[0:NPORTS-1]; + wire buf_tkeep [0:NPORTS-1]; + wire buf_tlast [0:NPORTS-1]; + wire buf_tvalid[0:NPORTS-1]; + wire buf_tready[0:NPORTS-1]; + wire [CHDR_W-1:0] swi_tdata [0:NPORTS-1]; + wire [NPORTS_W-1:0] swi_tdest [0:NPORTS-1]; + wire swi_tlast [0:NPORTS-1]; + wire swi_tvalid[0:NPORTS-1]; + wire swi_tready[0:NPORTS-1]; + wire [(CHDR_W*NPORTS)-1:0] swo_tdata [0:NPORTS-1], muxi_tdata [0:NPORTS-1]; + wire [NPORTS-1:0] swo_tlast [0:NPORTS-1], muxi_tlast [0:NPORTS-1]; + wire [NPORTS-1:0] swo_tvalid[0:NPORTS-1], muxi_tvalid[0:NPORTS-1]; + wire [NPORTS-1:0] swo_tready[0:NPORTS-1], muxi_tready[0:NPORTS-1]; + + genvar n, i, j; + generate + for (n = 0; n < NPORTS; n = n + 1) begin: i_ports + // For each input port, first check if we have a management packet + // arriving. If it arrives, the top config commands are extrated, sent to the + // routing table for configuration, and the rest of the packet is forwarded + // down to the router. + // the router. + if (n < NPORTS_MGMT) begin + chdr_mgmt_pkt_handler #( + .PROTOVER(PROTOVER), .CHDR_W(CHDR_W), .MGMT_ONLY(0) + ) mgmt_ep_i ( + .clk (clk ), + .rst (reset ), + .node_info (chdr_mgmt_build_node_info(EXT_INFO, n, NODE_TYPE_XBAR, device_id)), + .s_axis_chdr_tdata (s_axis_tdata [(n*CHDR_W)+:CHDR_W] ), + .s_axis_chdr_tlast (s_axis_tlast [n] ), + .s_axis_chdr_tvalid (s_axis_tvalid[n] ), + .s_axis_chdr_tready (s_axis_tready[n] ), + .s_axis_chdr_tuser ('d0 ), + .m_axis_chdr_tdata (i_tdata [n] ), + .m_axis_chdr_tdest (i_tdest [n] ), + .m_axis_chdr_tid (i_tid [n] ), + .m_axis_chdr_tlast (i_tlast [n] ), + .m_axis_chdr_tvalid (i_tvalid [n] ), + .m_axis_chdr_tready (i_tready [n] ), + .ctrlport_req_wr (rtcfg_req_wr [n] ), + .ctrlport_req_rd (/* unused */ ), + .ctrlport_req_addr (rtcfg_req_addr[(n*16)+:16] ), + .ctrlport_req_data (rtcfg_req_data[(n*32)+:32] ), + .ctrlport_resp_ack (rtcfg_resp_ack[n] ), + .ctrlport_resp_data (32'h0 /* unused */ ), + .op_stb (/* unused */ ), + .op_dst_epid (/* unused */ ), + .op_src_epid (/* unused */ ), + .op_data (/* unused */ ) + ); + end else begin + assign i_tdata [n] = s_axis_tdata [(n*CHDR_W)+:CHDR_W]; + assign i_tid [n] = CHDR_MGMT_ROUTE_EPID; + assign i_tdest [n] = 10'd0; // Unused + assign i_tlast [n] = s_axis_tlast [n]; + assign i_tvalid [n] = s_axis_tvalid[n]; + assign s_axis_tready[n] = i_tready [n]; + + assign rtcfg_req_wr [n] = 1'b0; + assign rtcfg_req_addr[(n*16)+:16] = 16'h0; + assign rtcfg_req_data[(n*32)+:32] = 32'h0; + end + + // Ingress buffer module that does the following: + // - Stores and gates an incoming packet + // - Looks up destination in routing table and attaches a tdest for the packet + chdr_xb_ingress_buff #( + .WIDTH(CHDR_W), .MTU(MTU), .DEST_W(NPORTS_W), .NODE_ID(n) + ) buf_i ( + .clk (clk ), + .reset (reset ), + .s_axis_chdr_tdata (i_tdata [n] ), + .s_axis_chdr_tdest (i_tdest [n][NPORTS_W-1:0] ), + .s_axis_chdr_tid (i_tid [n] ), + .s_axis_chdr_tlast (i_tlast [n] ), + .s_axis_chdr_tvalid (i_tvalid [n] ), + .s_axis_chdr_tready (i_tready [n] ), + .m_axis_chdr_tdata (buf_tdata [n] ), + .m_axis_chdr_tdest (buf_tdest_tmp[n] ), + .m_axis_chdr_tkeep (buf_tkeep [n] ), + .m_axis_chdr_tlast (buf_tlast [n] ), + .m_axis_chdr_tvalid (buf_tvalid [n] ), + .m_axis_chdr_tready (buf_tready [n] ), + .m_axis_find_tdata (find_tdata [(n*EPID_W)+:EPID_W] ), + .m_axis_find_tvalid (find_tvalid [n] ), + .m_axis_find_tready (find_tready [n] ), + .s_axis_result_tdata (result_tdata [(n*NPORTS_W)+:NPORTS_W]), + .s_axis_result_tkeep (result_tkeep [n] ), + .s_axis_result_tvalid(result_tvalid[n] ), + .s_axis_result_tready(result_tready[n] ) + ); + assign buf_tdest[n] = buf_tkeep[n] ? buf_tdest_tmp[n] : DEFAULT_PORT[NPORTS_W-1:0]; + + // Pipeline state + axi_fifo #( + .WIDTH(CHDR_W+1+NPORTS_W), .SIZE(1) + ) pipe_i ( + .clk (clk ), + .reset (reset ), + .clear (1'b0 ), + .i_tdata ({buf_tlast[n], buf_tdest[n], buf_tdata[n]}), + .i_tvalid (buf_tvalid[n] ), + .i_tready (buf_tready[n] ), + .o_tdata ({swi_tlast[n], swi_tdest[n], swi_tdata[n]}), + .o_tvalid (swi_tvalid[n] ), + .o_tready (swi_tready[n] ), + .space (/* Unused */ ), + .occupied (/* Unused */ ) + ); + + // Ingress demux. Use the tdest field to determine packet destination + axis_switch #( + .DATA_W(CHDR_W), .DEST_W(1), .IN_PORTS(1), .OUT_PORTS(NPORTS), .PIPELINE(1) + ) demux_i ( + .clk (clk ), + .reset (reset ), + .s_axis_tdata (swi_tdata [n] ), + .s_axis_tdest ({1'b0, swi_tdest [n]}), + .s_axis_tlast (swi_tlast [n] ), + .s_axis_tvalid (swi_tvalid[n] ), + .s_axis_tready (swi_tready[n] ), + .s_axis_alloc (1'b0 ), + .m_axis_tdata (swo_tdata [n] ), + .m_axis_tdest (/* Unused */ ), + .m_axis_tlast (swo_tlast [n] ), + .m_axis_tvalid (swo_tvalid[n] ), + .m_axis_tready (swo_tready[n] ) + ); + end + + for (i = 0; i < NPORTS; i = i + 1) begin + for (j = 0; j < NPORTS; j = j + 1) begin + assign muxi_tdata [i][j*CHDR_W+:CHDR_W] = swo_tdata [j][i*CHDR_W+:CHDR_W]; + assign muxi_tlast [i][j] = swo_tlast [j][i]; + assign muxi_tvalid[i][j] = swo_tvalid [j][i]; + assign swo_tready [i][j] = muxi_tready[j][i]; + end + end + + for (n = 0; n < NPORTS; n = n + 1) begin: o_ports + if (OPTIMIZE == "PERFORMANCE") begin + // Use the axis_switch module when optimizing for performance + // This logic has some extra levels of logic to ensure + // that the switch allocation happens in 0 clock cycles which + // means that Fmax for this implementation will be lower. + + wire mux_ready = |muxi_tready[n]; // Max 1 bit should be high + wire mux_valid = |muxi_tvalid[n]; + wire mux_last = |(muxi_tvalid[n] & muxi_tlast[n]); + + // Track the input packet state + reg [0:0] pkt_state = PKT_ST_HEAD; + always @(posedge clk) begin + if (reset) begin + pkt_state <= PKT_ST_HEAD; + end else if (mux_valid & mux_ready) begin + pkt_state <= mux_last ? PKT_ST_HEAD : PKT_ST_BODY; + end + end + + // The switch requires the allocation to stay valid until the + // end of the packet. We also might need to keep the previous + // packet's allocation to compute the current one + reg [NPORTS_W-1:0] prev_sw_alloc = {NPORTS_W{1'b0}}; + reg [NPORTS_W-1:0] pkt_sw_alloc = {NPORTS_W{1'b0}}; + wire [NPORTS_W-1:0] muxi_sw_alloc = (mux_valid && pkt_state == PKT_ST_HEAD) ? + compute_mux_alloc(muxi_tvalid[n], prev_sw_alloc) : pkt_sw_alloc; + + always @(posedge clk) begin + if (reset) begin + prev_sw_alloc <= {NPORTS_W{1'b0}}; + pkt_sw_alloc <= {NPORTS_W{1'b0}}; + end else if (mux_valid & mux_ready) begin + if (pkt_state == PKT_ST_HEAD) + pkt_sw_alloc <= muxi_sw_alloc; + if (mux_last) + prev_sw_alloc <= muxi_sw_alloc; + end + end + + axis_switch #( + .DATA_W(CHDR_W), .DEST_W(1), .IN_PORTS(NPORTS), .OUT_PORTS(1), + .PIPELINE(0) + ) mux_i ( + .clk (clk ), + .reset (reset ), + .s_axis_tdata (muxi_tdata [n] ), + .s_axis_tdest ({NPORTS{1'b0}} /* Unused */ ), + .s_axis_tlast (muxi_tlast [n] ), + .s_axis_tvalid (muxi_tvalid[n] ), + .s_axis_tready (muxi_tready[n] ), + .s_axis_alloc (muxi_sw_alloc ), + .m_axis_tdata (m_axis_tdata [(n*CHDR_W)+:CHDR_W]), + .m_axis_tdest (/* Unused */ ), + .m_axis_tlast (m_axis_tlast [n] ), + .m_axis_tvalid (m_axis_tvalid[n] ), + .m_axis_tready (m_axis_tready[n] ) + ); + end else begin + // axi_mux has an additional bubble cycle but the logic + // to allocate an input port has fewer levels and takes + // up fewer resources. + axi_mux #( + .PRIO(MUX_ALLOC == "PRIO"), .WIDTH(CHDR_W), .SIZE(NPORTS), + .PRE_FIFO_SIZE(OPTIMIZE == "TIMING" ? 1 : 0), .POST_FIFO_SIZE(1) + ) mux_i ( + .clk (clk ), + .reset (reset ), + .clear (1'b0 ), + .i_tdata (muxi_tdata [n] ), + .i_tlast (muxi_tlast [n] ), + .i_tvalid (muxi_tvalid [n] ), + .i_tready (muxi_tready [n] ), + .o_tdata (m_axis_tdata [(n*CHDR_W)+:CHDR_W]), + .o_tlast (m_axis_tlast [n] ), + .o_tvalid (m_axis_tvalid[n] ), + .o_tready (m_axis_tready[n] ) + ); + end + end + endgenerate + + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v new file mode 100644 index 000000000..dcb11da8e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v @@ -0,0 +1,259 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_ingress_buff +// +// Description: +// +// Ingress buffer module for the CHDR crossbar. This module stores and gates +// the incoming packet and simultaneously determines the destination (TDEST) +// by inspecting the incoming TID. If the TID is CHDR_MGMT_ROUTE_EPID then we +// perform a lookup on the TID to determine the correct output for TDEST. +// +// Parameters: +// +// WIDTH : Data width of the CHDR interfaces (TDATA) +// MTU : Maximum transmission unit, in WIDTH-sized words, is 2**MTU +// DEST_W : Width of the destination routing information (TDEST) +// NODE_ID : Numeric identifier for this port +// + +module chdr_xb_ingress_buff #( + parameter WIDTH = 64, + parameter MTU = 5, + parameter DEST_W = 4, + parameter [9:0] NODE_ID = 0 +) ( + input wire clk, + input wire reset, + // CHDR input port + input wire [WIDTH-1:0] s_axis_chdr_tdata, + input wire [DEST_W-1:0] s_axis_chdr_tdest, + input wire [1:0] s_axis_chdr_tid, + input wire s_axis_chdr_tlast, + input wire s_axis_chdr_tvalid, + output wire s_axis_chdr_tready, + // CHDR output port (with a tdest and tkeep) + output wire [WIDTH-1:0] m_axis_chdr_tdata, + output wire [DEST_W-1:0] m_axis_chdr_tdest, + output wire m_axis_chdr_tkeep, + output wire m_axis_chdr_tlast, + output wire m_axis_chdr_tvalid, + input wire m_axis_chdr_tready, + // Find port going to routing table + output wire [15:0] m_axis_find_tdata, + output wire m_axis_find_tvalid, + input wire m_axis_find_tready, + // Result port from routing table + input wire [DEST_W-1:0] s_axis_result_tdata, + input wire s_axis_result_tkeep, + input wire s_axis_result_tvalid, + output wire s_axis_result_tready +); + + // RFNoC Includes + `include "../core/rfnoc_chdr_utils.vh" + `include "../core/rfnoc_chdr_internal_utils.vh" + + + //--------------------------------------------------------------------------- + // Packet Buffer + //--------------------------------------------------------------------------- + + wire [WIDTH-1:0] gate_i_tdata , gate_o_tdata ; + wire gate_i_tlast , gate_o_tlast ; + wire gate_i_tvalid, gate_o_tvalid; + wire gate_i_tready, gate_o_tready; + + // The axi_packet_gate queues up an entire packet before letting it go out. + // This reduces congestion in the crossbar for slowly-built packets. + axi_packet_gate #( + .WIDTH (WIDTH), + .SIZE (MTU) + ) axi_packet_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (gate_i_tdata), + .i_tlast (gate_i_tlast), + .i_terror (1'b0), + .i_tvalid (gate_i_tvalid), + .i_tready (gate_i_tready), + .o_tdata (gate_o_tdata), + .o_tlast (gate_o_tlast), + .o_tvalid (gate_o_tvalid), + .o_tready (gate_o_tready) + ); + + + //--------------------------------------------------------------------------- + // Destination (TDEST) Muxing + //--------------------------------------------------------------------------- + + wire [15:0] find_tdata; + wire find_tvalid, find_tready; + + wire [DEST_W-1:0] dest_i_tdata; + wire dest_i_tkeep, dest_i_tvalid, dest_i_tready; + wire [DEST_W-1:0] dest_o_tdata; + wire dest_o_tkeep, dest_o_tvalid, dest_o_tready; + + // The find_fifo holds the lookup requests from the find_* AXI stream and + // sends them on to the m_axis_find_* stream port. It is required because the + // input logic (see below) doesn't obey the AXI handshake protocol but this + // FIFO can tolerate it. + axi_fifo #( + .WIDTH (16), + .SIZE (1) + ) find_fifo_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (find_tdata), + .i_tvalid (find_tvalid), + .i_tready (find_tready), + .o_tdata (m_axis_find_tdata), + .o_tvalid (m_axis_find_tvalid), + .o_tready (m_axis_find_tready), + .space (), + .occupied () + ); + + // The destination (TDEST) can come from two sources: Directly from the + // packet info (in which case TDEST was immediately determined and comes in + // on dest_* AXI stream) or via a lookup (in which case the result comes in + // on s_axis_result_*). Only one of these data paths is used at a time, so we + // mux them together here create a single stream (dest_o_*) that contains the + // destination for the next packet. + axi_mux #( + .WIDTH (DEST_W+1), + .SIZE (2), + .PRIO (1), + .PRE_FIFO_SIZE (1), + .POST_FIFO_SIZE (1) + ) dest_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({dest_i_tkeep, dest_i_tdata, + s_axis_result_tkeep, s_axis_result_tdata}), + .i_tlast (2'b11), + .i_tvalid ({dest_i_tvalid, s_axis_result_tvalid}), + .i_tready ({dest_i_tready, s_axis_result_tready}), + .o_tdata ({dest_o_tkeep, dest_o_tdata}), + .o_tlast (), + .o_tvalid (dest_o_tvalid), + .o_tready (dest_o_tready) + ); + + + //--------------------------------------------------------------------------- + // Input Logic + //--------------------------------------------------------------------------- + // + // When a packet comes in, we may have to do one of the following: + // 1) Lookup the TDEST using the EPID + // 2) Use the specified input TDEST + // 3) Use the NODE_ID as the TDEST (to return the packet) + // + //--------------------------------------------------------------------------- + + // The s_axis_chdr_hdr_valid signal indicates when TDATA and TID contain the + // header information for the current packet. + reg s_axis_chdr_hdr_valid = 1'b1; + + always @(posedge clk) begin + if (reset) begin + s_axis_chdr_hdr_valid <= 1'b1; + end else if (s_axis_chdr_tvalid & s_axis_chdr_tready) begin + s_axis_chdr_hdr_valid <= s_axis_chdr_tlast; + end + end + + // The dest_find_tready signal indicates if the find_fifo is ready or if the + // dest port of the dest_muax is ready, depending on which path will be used. + reg dest_find_tready; + + always @(*) begin + if (s_axis_chdr_hdr_valid) begin + case (s_axis_chdr_tid) + CHDR_MGMT_ROUTE_EPID: + dest_find_tready = find_tready; + CHDR_MGMT_ROUTE_TDEST: + dest_find_tready = dest_i_tready; + CHDR_MGMT_RETURN_TO_SRC: + dest_find_tready = dest_i_tready; + default: + dest_find_tready = dest_i_tready; // We should never get here + endcase + end else begin + dest_find_tready = 1'b1; + end + end + + // We can accept a transfer from the input CHDR stream only if the the packet + // gate and dest/find datapaths are ready. + assign s_axis_chdr_tready = s_axis_chdr_tvalid && + gate_i_tready && + dest_find_tready; + + // The chdr_header_stb signal indicates when we write data into the dest/find + // data path. This happens when we're accepting the header word of the packet + // into the packet gate. + wire chdr_header_stb = s_axis_chdr_tvalid && + s_axis_chdr_tready && + s_axis_chdr_hdr_valid; + + // ************************************************************************** + // WARNING: The logic below violates AXI-Stream by having a tready -> tvalid + // dependency To ensure no deadlocks, we must place FIFOs downstream + // of gate_i_*, find_* and dest_i_* + + // Here we decide if we need to do a lookup using the find_* path or if the + // destination is known and can be put directly on the dest_* path. + // + // Start a lookup request if the TID is CHDR_MGMT_ROUTE_EPID. + assign find_tdata = chdr_get_dst_epid(s_axis_chdr_tdata[63:0]); + assign find_tvalid = chdr_header_stb && + (s_axis_chdr_tid == CHDR_MGMT_ROUTE_EPID); + // Set TDEST directly if TID is CHDR_MGMT_ROUTE_TDEST or + // CHDR_MGMT_RETURN_TO_SRC. + assign dest_i_tdata = (s_axis_chdr_tid == CHDR_MGMT_ROUTE_TDEST) ? + s_axis_chdr_tdest : NODE_ID[DEST_W-1:0]; + assign dest_i_tkeep = 1'b1; + assign dest_i_tvalid = chdr_header_stb && + (s_axis_chdr_tid != CHDR_MGMT_ROUTE_EPID); + + // Input logic for axi_packet_gate + assign gate_i_tdata = s_axis_chdr_tdata; + assign gate_i_tlast = s_axis_chdr_tlast; + assign gate_i_tvalid = s_axis_chdr_tready && s_axis_chdr_tvalid; + + // + // ************************************************************************** + + + //--------------------------------------------------------------------------- + // Output Logic + //--------------------------------------------------------------------------- + // + // The destination for the packet (TDEST) must be valid before we allow the + // header of the packet to pass through. So the packet must be blocked until + // the output of the dest_o_* is valid. TDEST and TKEEP must remain valid + // until the end of the packet. + // + //--------------------------------------------------------------------------- + + assign m_axis_chdr_tdata = gate_o_tdata; + assign m_axis_chdr_tlast = gate_o_tlast; + assign m_axis_chdr_tdest = dest_o_tdata; + assign m_axis_chdr_tkeep = dest_o_tkeep; + assign m_axis_chdr_tvalid = gate_o_tvalid && dest_o_tvalid; + + assign gate_o_tready = m_axis_chdr_tvalid && m_axis_chdr_tready; + assign dest_o_tready = m_axis_chdr_tvalid && m_axis_chdr_tready && m_axis_chdr_tlast; + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v new file mode 100644 index 000000000..f445efc68 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v @@ -0,0 +1,122 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_xb_routing_table +// Description: +// A routing table for the CHDR crossbar. This table is designed +// to be shared between all ports. It has an AXI-Stream lookup +// interface and a ctrlport (reduced) configuration interface. + +module chdr_xb_routing_table #( + parameter SIZE = 6, + parameter NPORTS = 4, + parameter EXT_INS_PORT_EN = 1 +) ( + // Clocks and resets + input wire clk, + input wire reset, + // Insertion Interface (for XB ports) + input wire [NPORTS-1:0] port_req_wr, + input wire [(16*NPORTS)-1:0] port_req_addr, + input wire [(32*NPORTS)-1:0] port_req_data, + output wire [NPORTS-1:0] port_resp_ack, + // Insertion Interface (External) + input wire ext_req_wr, + input wire [15:0] ext_req_addr, + input wire [31:0] ext_req_data, + output wire ext_resp_ack, + // Find Interface + input wire [(16*NPORTS)-1:0] axis_find_tdata, + input wire [NPORTS-1:0] axis_find_tvalid, + output wire [NPORTS-1:0] axis_find_tready, + // Result Interface (for Find) + output wire [($clog2(NPORTS)*NPORTS)-1:0] axis_result_tdata, + output wire [NPORTS-1:0] axis_result_tkeep, + output wire [NPORTS-1:0] axis_result_tvalid, + input wire [NPORTS-1:0] axis_result_tready +); + localparam NPORTS_W = $clog2(NPORTS); + localparam CFG_W = NPORTS_W + 16; + localparam CFG_PORTS = NPORTS + EXT_INS_PORT_EN; + + // CAM-based lookup table + + wire [15:0] insert_tdest; + wire [NPORTS_W-1:0] insert_tdata; + wire insert_tvalid; + wire insert_tready; + + axis_muxed_kv_map #( + .KEY_WIDTH(16), .VAL_WIDTH(NPORTS_W), + .SIZE(SIZE), .NUM_PORTS(NPORTS) + ) kv_map_i ( + .clk (clk ), + .reset (reset ), + .axis_insert_tdata (insert_tdata ), + .axis_insert_tdest (insert_tdest ), + .axis_insert_tvalid(insert_tvalid ), + .axis_insert_tready(insert_tready ), + .axis_find_tdata (axis_find_tdata ), + .axis_find_tvalid (axis_find_tvalid ), + .axis_find_tready (axis_find_tready ), + .axis_result_tdata (axis_result_tdata ), + .axis_result_tkeep (axis_result_tkeep ), + .axis_result_tvalid(axis_result_tvalid), + .axis_result_tready(axis_result_tready) + ); + + // Logic to convert from ctrlport to AXI-Stream + + wire ins_req_wr [0:CFG_PORTS-1]; + wire [15:0] ins_req_addr[0:CFG_PORTS-1]; + wire [NPORTS_W-1:0] ins_req_data[0:CFG_PORTS-1]; + wire ins_resp_ack[0:CFG_PORTS-1]; + + reg [(CFG_PORTS*CFG_W)-1:0] cfg_tdata; + reg [CFG_PORTS-1:0] cfg_tvalid = {CFG_PORTS{1'b0}}; + wire [CFG_PORTS-1:0] cfg_tready; + + genvar i; + generate for (i = 0; i < CFG_PORTS; i=i+1) begin + assign ins_req_wr [i] = (i < NPORTS) ? port_req_wr[i] : ext_req_wr; + assign ins_req_addr[i] = (i < NPORTS) ? port_req_addr[i*16 +: 16] : ext_req_addr; + assign ins_req_data[i] = (i < NPORTS) ? port_req_data[i*32 +: NPORTS_W] : ext_req_data[NPORTS_W-1:0]; + if (i < NPORTS) + assign port_resp_ack[i] = ins_resp_ack[i]; + else + assign ext_resp_ack = ins_resp_ack[i]; + + always @(posedge clk) begin + if (reset) begin + cfg_tvalid[i] <= 1'b0; + end else begin + if (~cfg_tvalid[i]) begin + if (ins_req_wr[i]) begin + cfg_tvalid[i] <= 1'b1; + cfg_tdata[(CFG_W*i) +: CFG_W] <= {ins_req_data[i], ins_req_addr[i]}; + end + end else begin + cfg_tvalid[i] <= ~cfg_tready[i]; + end + end + end + assign ins_resp_ack[i] = cfg_tvalid[i] & cfg_tready[i]; + end endgenerate + + // Multiplexer between XB ports and external cfg + + axi_mux #( + .WIDTH(CFG_W), .SIZE(CFG_PORTS), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) rtcfg_mux_i ( + .clk(clk), .reset(reset), .clear(1'b0), + .i_tdata(cfg_tdata), .i_tlast({(NPORTS_W + 16){1'b1}}), + .i_tvalid(cfg_tvalid), .i_tready(cfg_tready), + .o_tdata({insert_tdata, insert_tdest}), .o_tlast(), + .o_tvalid(insert_tvalid), .o_tready(insert_tready) + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile new file mode 100644 index 000000000..7fa7ae03b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile @@ -0,0 +1,52 @@ +# +# Copyright 2015 Ettus Research LLC +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Define part using PART_ID (<device>/<package>/<speedgrade>) +ARCH = kintex7 +PART_ID = xc7k410t/ffg900/-2 + +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/control/Makefile.srcs +include $(BASE_DIR)/../lib/fifo/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs + +DESIGN_SRCS = $(abspath \ +$(FIFO_SRCS) \ +$(CONTROL_LIB_SRCS) \ +$(RFNOC_XBAR_SRCS) \ +$(RFNOC_CORE_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +TB_TOP_MODULE ?= crossbar_tb +SIM_TOP = $(TB_TOP_MODULE) + +SIM_SRCS = \ +$(abspath chdr_traffic_source_sim.sv) \ +$(abspath chdr_traffic_sink_sim.sv) \ +$(abspath crossbar_tb.sv) \ +$(abspath $(TB_TOP_MODULE).sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile new file mode 100644 index 000000000..0f1a10a6e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile @@ -0,0 +1,51 @@ +# +# Copyright 2015 Ettus Research LLC +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Define part using PART_ID (<device>/<package>/<speedgrade>) +ARCH = kintex7 +PART_ID = xc7k410t/ffg900/-2 + +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/control/Makefile.srcs +include $(BASE_DIR)/../lib/fifo/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs + +DESIGN_SRCS = $(abspath \ +$(FIFO_SRCS) \ +$(CONTROL_LIB_SRCS) \ +$(RFNOC_XBAR_SRCS) \ +$(RFNOC_CORE_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +SIM_TOP = axis_ctrl_crossbar_nxn_tb + +SIM_SRCS = \ +$(abspath axis_ctrl_crossbar_nxn_tb.sv) \ +$(abspath ../crossbar_tb.sv) \ +$(abspath ../chdr_traffic_source_sim.sv) \ +$(abspath ../chdr_traffic_sink_sim.sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv new file mode 100644 index 000000000..fa112f5cb --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv @@ -0,0 +1,26 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + + +`timescale 1ns/1ps + +module axis_ctrl_crossbar_nxn_tb(); + crossbar_tb #( + .TEST_NAME ("axis_ctrl_crossbar_nxn_tb"), + .ROUTER_IMPL ("axis_ctrl_2d_torus" ), // Router implementation + .ROUTER_PORTS (20 ), // Number of ports + .ROUTER_DWIDTH (64 ), // Router datapath width + .MTU_LOG2 (5 ), // log2 of max packet size for router + .NUM_MASTERS (4 ), // Number of data generators in test + .TEST_MAX_PACKETS (100 ), // How many packets to stream per test case? + .TEST_LPP (20 ), // Lines per packet + .TEST_MIN_INJ_RATE (10 ), // Minimum injection rate to test + .TEST_MAX_INJ_RATE (40 ), // Maximum injection rate to test + .TEST_INJ_RATE_INCR (10 ), // Injection rate increment + .TEST_GEN_LL_FILES (0 ) // Generate files to produce load-latency graphs? + ) impl ( + /* no IO */ + ); +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile new file mode 100644 index 000000000..399515640 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile @@ -0,0 +1,51 @@ +# +# Copyright 2015 Ettus Research LLC +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Define part using PART_ID (<device>/<package>/<speedgrade>) +ARCH = kintex7 +PART_ID = xc7k410t/ffg900/-2 + +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/control/Makefile.srcs +include $(BASE_DIR)/../lib/fifo/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs + +DESIGN_SRCS = $(abspath \ +$(FIFO_SRCS) \ +$(CONTROL_LIB_SRCS) \ +$(RFNOC_XBAR_SRCS) \ +$(RFNOC_CORE_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +SIM_TOP = chdr_crossbar_nxn_tb + +SIM_SRCS = \ +$(abspath chdr_crossbar_nxn_tb.sv) \ +$(abspath ../crossbar_tb.sv) \ +$(abspath ../chdr_traffic_source_sim.sv) \ +$(abspath ../chdr_traffic_sink_sim.sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv new file mode 100644 index 000000000..1c5cace63 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv @@ -0,0 +1,26 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + + +`timescale 1ns/1ps + +module chdr_crossbar_nxn_tb(); + crossbar_tb #( + .TEST_NAME ("chdr_crossbar_nxn_tb"), + .ROUTER_IMPL ("chdr_crossbar_nxn" ), // Router implementation + .ROUTER_PORTS (10 ), // Number of ports + .ROUTER_DWIDTH (64 ), // Router datapath width + .MTU_LOG2 (7 ), // log2 of max packet size for router + .NUM_MASTERS (10 ), // Number of data generators in test + .TEST_MAX_PACKETS (100 ), // How many packets to stream per test case? + .TEST_LPP (100 ), // Lines per packet + .TEST_MIN_INJ_RATE (60 ), // Minimum injection rate to test + .TEST_MAX_INJ_RATE (100 ), // Maximum injection rate to test + .TEST_INJ_RATE_INCR (10 ), // Injection rate increment + .TEST_GEN_LL_FILES (0 ) // Generate files to produce load-latency graphs? + ) impl ( + /* no IO */ + ); +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv new file mode 100644 index 000000000..a9fe3ba27 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv @@ -0,0 +1,150 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_traffic_sink_sim +// Description: +// A sink for CHDR traffic. Simulation only. +// Accepts packets and computes the following metrics: +// - Data integrity errors +// - Packet latency +// - Throughput counts +// All metrics can optionally be written to a file to +// generate load-latency graphs. + +`timescale 1ns/1ps + +`include "sim_cvita_lib.svh" + +module chdr_traffic_sink_sim #( + parameter WIDTH = 64, + parameter MTU = 5, + parameter [15:0] NODE_ID = 'd0, + parameter [15:0] NUM_NODES = 'd16, + parameter FILE_PATH = ".", + parameter FLUSH_N = 4 +) ( + // Clocks and resets + input clk, + input rst, + // Settings + input [63:0] current_time, + input start_stb, + input [7:0] injection_rate, + input [15:0] lines_per_pkt, + input [7:0] traffic_patt, + // CHDR master interface + input [WIDTH-1:0] s_axis_tdata, + input s_axis_tlast, + input s_axis_tvalid, + output s_axis_tready, + // Metrics + output session_active, + output [31:0] xfer_count, + output [31:0] pkt_count, + output [31:0] data_err_count, + output [31:0] route_err_count +); + + // Constants + localparam integer ERR_BIT_PKT_SIZE_MISMATCH = 1; + localparam integer ERR_BIT_PKT_DATA_MISMATCH = 2; + localparam integer ERR_BIT_PKT_DEST_MISMATCH = 4; + localparam integer ERR_BIT_PKT_SEQUENCE_ERR = 8; + + cvita_slave #(.DWIDTH(WIDTH)) s_chdr (.clk(clk)); + cvita_pkt_t pkt; + + assign s_chdr.axis.tdata = s_axis_tdata; + assign s_chdr.axis.tlast = s_axis_tlast; + assign s_chdr.axis.tvalid = s_axis_tvalid; + assign s_axis_tready = s_chdr.axis.tready; + + logic running = 0; + integer num_data_errs = 0; + integer num_route_errs = 0; + logic [31:0] num_pkts_xferd = 0; + logic [31:0] num_samps_xferd = 0; + + assign data_err_count = num_data_errs; + assign route_err_count = num_route_errs; + assign xfer_count = num_samps_xferd; + assign pkt_count = num_pkts_xferd; + assign session_active = running; + + integer session = 0; + string filename; + integer handle = 0; + integer err = 0; + integer bus_idle_cnt = 0; + logic [WIDTH-1:0] i; + + // Egress buff in source is MTU + 4 + localparam integer IDLE_TIMEOUT = (1 << (MTU + 4 + FLUSH_N)); + + initial begin: consume_blk + // Consume infinitely + s_chdr.reset(); + while (1) begin + // A session begins on the posedge of start_stb + while (~start_stb) @(posedge clk); + session = session + 1; + $sformat(filename, "%s/pkts_node%05d_inj%03d_lpp%05d_traffic%c_sess%04d.csv", + FILE_PATH, NODE_ID, injection_rate, lines_per_pkt, traffic_patt, session); + if (FILE_PATH != "") begin + handle = $fopen(filename, "w"); + if (handle == 0) begin + $error("Could not open file: %s", filename); + $finish(); + end + end + if (handle != 0) $fdisplay(handle, "Src,Dst,Seqno,Error,Latency"); + s_chdr.reset(); + num_data_errs = 0; + num_route_errs = 0; + num_pkts_xferd = 0; + num_samps_xferd = 0; + bus_idle_cnt = 0; + running = 1; + while (1) begin + // Pull packet from bus + err = 0; + if (~s_chdr.axis.tvalid[0]) begin + @(posedge clk); + bus_idle_cnt = bus_idle_cnt + 1; + if (bus_idle_cnt <= IDLE_TIMEOUT) + continue; + else + break; + end + s_chdr.pull_pkt(pkt, 0); + bus_idle_cnt = 0; + num_pkts_xferd = num_pkts_xferd + 1; + num_samps_xferd = num_samps_xferd + lines_per_pkt; + // Validate packet + if (pkt.hdr.dst_sid != NODE_ID) begin + err = err + ERR_BIT_PKT_DEST_MISMATCH; + num_route_errs = num_route_errs + 1; + end + if (pkt.payload.size() != lines_per_pkt-2) begin + err = err + ERR_BIT_PKT_SIZE_MISMATCH; + num_data_errs = num_data_errs + 1; + end else begin + for (i = 'd0; i < (lines_per_pkt-2); i=i+1) begin + if (pkt.payload[i] != i) begin + err = err + ERR_BIT_PKT_DATA_MISMATCH; + num_data_errs = num_data_errs + 1; + break; + end + end + end + if (handle != 0) $fdisplay(handle, "%00d,%00d,%00d,%00d,%00d", + pkt.hdr.src_sid, pkt.hdr.dst_sid, pkt.hdr.seqnum, err, (current_time - pkt.hdr.timestamp)); + end + running = 0; + if (handle != 0) $fclose(handle); + end + end + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv new file mode 100644 index 000000000..8c3d974c9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv @@ -0,0 +1,202 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_traffic_source_sim +// Description: +// A traffic generator for CHDR traffic. Simulation only. +// Supports multiple traffic pattern and injection rates. +// + +`timescale 1ns/1ps + +`include "sim_cvita_lib.svh" + +module chdr_traffic_source_sim #( + parameter WIDTH = 64, // Width of the AXI-Stream data bus + parameter MTU = 5, // log2 of the max number of lines in a packet + parameter [15:0] NODE_ID = 'd0, // Node ID for this generator + parameter [15:0] NUM_NODES = 'd16 // Total number of generators in the application +) ( + // Clocks and resets + input clk, // AXI-Stream clock + input rst, // AXI-Stream reset + // Settings + input [63:0] current_time, // The current value of the global timebase (synch to clk) + input start_stb, // A strobe that indicates the start of a generation session + input [7:0] injection_rate, // The inject rate (in percent) to simulate + input [15:0] lines_per_pkt, // Number of lines per packet to generate + input [7:0] traffic_patt, // The traffic pattern (see localparams below for values) + input [31:0] num_pkts_to_send, // Number of packets to send + // CHDR master interface + output [WIDTH-1:0] m_axis_tdata, // AXI-Stream master tdata + output m_axis_tlast, // AXI-Stream master tlast + output m_axis_tvalid, // AXI-Stream master tvalid + input m_axis_tready, // AXI-Stream master tready + // Metrics + output session_active, // Signal indicating if generation session is active + output [63:0] session_duration, // Session duration (only valid after session ends) + output [31:0] xfer_count, // Number of lines transferred (only valid after session ends) + output [31:0] pkt_count // Number of packets transferred (only valid after session ends) +); + // **** Supported Traffic Patters **** + localparam [7:0] TRAFFIC_PATT_LOOPBACK = 8'd76; //L + localparam [7:0] TRAFFIC_PATT_NEIGHBOR = 8'd78; //N + localparam [7:0] TRAFFIC_PATT_BIT_COMPLEMENT = 8'd67; //C + localparam [7:0] TRAFFIC_PATT_SEQUENTIAL = 8'd83; //S + localparam [7:0] TRAFFIC_PATT_UNIFORM = 8'd85; //U + localparam [7:0] TRAFFIC_PATT_UNIFORM_OTHERS = 8'd79; //O + localparam [7:0] TRAFFIC_PATT_RANDOM_PERM = 8'd82; //R + + cvita_master #(.DWIDTH(WIDTH)) m_chdr (.clk(clk)); + axis_t #(.DWIDTH(WIDTH)) post_fifo (.clk(clk)); + axis_t #(.DWIDTH(WIDTH)) pre_gate (.clk(clk)); + cvita_hdr_t header; + reg throttle = 1'b1; + + logic running = 0; + logic [31:0] curr_pkt_num = 'd0; + logic [31:0] num_samps_xferd = 'd0; + logic [63:0] start_time = 0; + logic [63:0] stop_time = 0; + logic [15:0] last_gen_sid = (NODE_ID - 16'd1); + + assign xfer_count = num_samps_xferd; + assign pkt_count = curr_pkt_num; + assign session_duration = (stop_time - start_time); + assign session_active = running; + + // Utility function to assign SIDs based on traffic pattern + function [15:0] gen_dst_sid; + input [7:0] traffic_patt; + input [15:0] last_sid; + + if (traffic_patt == TRAFFIC_PATT_UNIFORM) begin + gen_dst_sid = $urandom_range('d0, NUM_NODES-'d1); + end else if (traffic_patt == TRAFFIC_PATT_UNIFORM_OTHERS) begin + logic [31:0] rnum = $urandom_range('d0, NUM_NODES-'d2); + if (rnum < NODE_ID) + gen_dst_sid = rnum[15:0]; + else + gen_dst_sid = rnum[15:0] + 16'd1; + end else if (traffic_patt == TRAFFIC_PATT_SEQUENTIAL) begin + gen_dst_sid = (last_sid + 16'd1) % NUM_NODES; + end else if (traffic_patt == TRAFFIC_PATT_NEIGHBOR) begin + gen_dst_sid = (NODE_ID + 16'd1) % NUM_NODES; + end else if (traffic_patt == TRAFFIC_PATT_LOOPBACK) begin + gen_dst_sid = NODE_ID; + end else if (traffic_patt == TRAFFIC_PATT_BIT_COMPLEMENT) begin + gen_dst_sid = (NUM_NODES - NODE_ID - 1) % NUM_NODES; + end else if (traffic_patt == TRAFFIC_PATT_RANDOM_PERM) begin + //TODO: Implement me + gen_dst_sid = 0; + end else begin + gen_dst_sid = 'd0; + end + endfunction + + // Generation loop. Push to m_chdr infinitely fast + initial begin: gen_blk + // Generate infinitely + $srandom(NODE_ID + NUM_NODES); + m_chdr.reset(); + while (1) begin + // A generation session begins on the posedge of start_stb + while (~start_stb) @(posedge clk); + curr_pkt_num = 'd0; + m_chdr.reset(); + num_samps_xferd = 'd0; + start_time = current_time; + running = 1; + while (curr_pkt_num < num_pkts_to_send) begin + header = '{ + pkt_type:DATA, has_time:1, eob:0, + seqnum:curr_pkt_num[11:0], length:(lines_per_pkt*8), + src_sid:NODE_ID, dst_sid:gen_dst_sid(traffic_patt, last_gen_sid), + timestamp:0 //TS attached later + }; + last_gen_sid = header.dst_sid; + curr_pkt_num = curr_pkt_num + 'd1; + m_chdr.push_ramp_pkt(lines_per_pkt-2, 'h0, 'h1, header); + num_samps_xferd = num_samps_xferd + lines_per_pkt; + end + running = 0; + stop_time = current_time; + end + end + + // Capture packets in a really short FIFO (for backpressure) + axi_fifo #( + .WIDTH(WIDTH+1), .SIZE(MTU + 1) + ) fifo_i ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .i_tdata ({m_chdr.axis.tlast, m_chdr.axis.tdata}), + .i_tvalid (m_chdr.axis.tvalid), + .i_tready (m_chdr.axis.tready), + .o_tdata ({post_fifo.tlast, post_fifo.tdata}), + .o_tvalid (post_fifo.tvalid), + .o_tready (post_fifo.tready), + .space (), + .occupied () + ); + + // Attach timestamp after the packet leaves the FIFO after + // throttling. + + localparam [1:0] ST_HDR = 2'd0; + localparam [1:0] ST_TS = 2'd1; + localparam [1:0] ST_BODY = 2'd2; + + reg [1:0] pkt_state = ST_HDR; + always_ff @(posedge clk) begin + if (rst) begin + pkt_state <= ST_HDR; + end else if (pre_gate.tvalid & pre_gate.tready) begin + case (pkt_state) + ST_HDR: + if (~pre_gate.tlast) + pkt_state <= pre_gate.tdata[61] ? ST_TS : ST_BODY; + ST_TS: + pkt_state <= pre_gate.tlast ? ST_HDR : ST_BODY; + ST_BODY: + pkt_state <= pre_gate.tlast ? ST_HDR : ST_BODY; + default: + pkt_state <= ST_HDR; + endcase + end + end + + // Enforce injection rate by pulling from FIFO with a certain time probability + always_ff @(posedge clk) begin + throttle <= ($urandom_range(32'd99, 32'd0) > {24'h0, injection_rate}); + end + + // Insert timestamp + throttle logic + assign pre_gate.tdata = (pkt_state == ST_TS) ? current_time : post_fifo.tdata; + assign pre_gate.tlast = post_fifo.tlast; + assign pre_gate.tvalid = post_fifo.tvalid & ~throttle; + assign post_fifo.tready = pre_gate.tready & ~throttle; + + // Gate the packet to smooth out throttle-related noise. + // This also serves as a buffer for the packet in case things are backed up + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(MTU + 4), .USE_AS_BUFF(1) + ) pkt_gate_i ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .i_tdata (pre_gate.tdata), + .i_tlast (pre_gate.tlast), + .i_terror (1'b0), + .i_tvalid (pre_gate.tvalid), + .i_tready (pre_gate.tready), + .o_tdata (m_axis_tdata), + .o_tlast (m_axis_tlast), + .o_tvalid (m_axis_tvalid), + .o_tready (m_axis_tready) + ); + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv new file mode 100644 index 000000000..fc9d53fe7 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv @@ -0,0 +1,428 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + + +`timescale 1ns/1ps +`define NS_PER_TICK 1 +`define NUM_TEST_CASES 7 + +`include "sim_clks_rsts.vh" +`include "sim_exec_report.vh" +`include "sim_set_rb_lib.svh" +`include "sim_axis_lib.svh" + +`define SIM_TIMEOUT_US 1000000 // Default: 1s + +module crossbar_tb #( + parameter TEST_NAME = "crossbar_tb", + // Router parameters + parameter ROUTER_IMPL = "axi_crossbar", // Router implementation + parameter ROUTER_PORTS = 10, // # Router ports + parameter ROUTER_DWIDTH = 64, // Router datapath width + parameter MTU_LOG2 = 7, // log2 of max packet size for router + parameter NUM_MASTERS = ROUTER_PORTS, // Number of data generators in test + // Test parameters + parameter TEST_MAX_PACKETS = 50, // How many packets to stream per test case? + parameter TEST_LPP = 50, // Lines per packet + parameter TEST_MIN_INJ_RATE = 60, // Minimum injection rate to test + parameter TEST_MAX_INJ_RATE = 100, // Maximum injection rate to test + parameter TEST_INJ_RATE_INCR = 10, // Injection rate increment + parameter TEST_GEN_LL_FILES = 0 // Generate files to produce load-latency graphs? + +)( + /* no IO */ +); + `TEST_BENCH_INIT(TEST_NAME,`NUM_TEST_CASES,`NS_PER_TICK) + + //---------------------------------------------------- + // General test setup + //---------------------------------------------------- + + // Clocks and reset + `DEFINE_CLK(clk, 5.000, 50) + `DEFINE_RESET(rst, 0, 10) + + // Timekeeper (cycle counter) + logic [63:0] timestamp; + initial begin : timekeeper_blk + while (rst) @(posedge clk); + timestamp = 'd0; + while (~rst) begin + @(posedge clk); + timestamp = timestamp + 'd1; + end + end + + //---------------------------------------------------- + // Instantiate traffic generators, checkers, buses + //---------------------------------------------------- + localparam FILE_PATH = {`WORKING_DIR, "/data/", ROUTER_IMPL}; + + // Data buses + axis_t #(.DWIDTH(ROUTER_DWIDTH), .NUM_STREAMS(ROUTER_PORTS)) src2rtr_axis (.clk(clk)); + axis_t #(.DWIDTH(ROUTER_DWIDTH), .NUM_STREAMS(ROUTER_PORTS)) rtr2snk_axis (.clk(clk)); + + // Control buses + settings_bus_master #(.SR_AWIDTH(16), .SR_DWIDTH(32)) rtr_sb (.clk(clk)); + wire rtr_sb_ack; + + // Test vector source and sink instantiation + logic [7:0] set_injection_rate; + logic [15:0] set_lines_per_pkt; + logic [7:0] set_traffic_patt; + logic [31:0] set_num_pkts_to_send; + logic snk_start_stb = 0; + logic src_start_stb = 0; + + wire [63:0] session_duration [0:ROUTER_PORTS-1]; + wire [ROUTER_PORTS-1:0] src_active; + wire [31:0] src_xfer_count [0:ROUTER_PORTS-1]; + wire [31:0] src_pkt_count [0:ROUTER_PORTS-1]; + wire [ROUTER_PORTS-1:0] snk_active; + wire [31:0] snk_xfer_count [0:ROUTER_PORTS-1]; + wire [31:0] snk_pkt_count [0:ROUTER_PORTS-1]; + wire [31:0] snk_data_err_count [0:ROUTER_PORTS-1]; + wire [31:0] snk_route_err_count[0:ROUTER_PORTS-1]; + + wire deadlock_detected; + reg deadlock_detected_del = 1'b0; + always @(posedge clk) deadlock_detected_del <= deadlock_detected; + wire deadlock_re = (deadlock_detected & ~deadlock_detected_del); + wire deadlock_fe = (~deadlock_detected & deadlock_detected_del); + + genvar i; + generate for (i = 0; i < ROUTER_PORTS; i=i+1) begin: src_snk_blk + chdr_traffic_source_sim #( + .WIDTH (ROUTER_DWIDTH), + .MTU (MTU_LOG2), + .NODE_ID (i), + .NUM_NODES (ROUTER_PORTS) + ) traffic_src ( + .clk (clk), + .rst (rst), + .current_time (timestamp), + .start_stb (src_start_stb & (i < NUM_MASTERS)), + .injection_rate (set_injection_rate), + .lines_per_pkt (set_lines_per_pkt), + .traffic_patt (set_traffic_patt), + .num_pkts_to_send (set_num_pkts_to_send), + .m_axis_tdata (src2rtr_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]), + .m_axis_tlast (src2rtr_axis.tlast[i]), + .m_axis_tvalid (src2rtr_axis.tvalid[i]), + .m_axis_tready (src2rtr_axis.tready[i]), + .session_active (src_active[i]), + .session_duration (session_duration[i]), + .xfer_count (src_xfer_count[i]), + .pkt_count (src_pkt_count[i]) + ); + + chdr_traffic_sink_sim #( + .WIDTH (ROUTER_DWIDTH), + .MTU (MTU_LOG2), + .NODE_ID (i), + .NUM_NODES (ROUTER_PORTS), + .FILE_PATH (TEST_GEN_LL_FILES==1 ? FILE_PATH : "") + ) traffic_sink ( + .clk (clk), + .rst (rst), + .current_time (timestamp), + .start_stb (snk_start_stb), + .injection_rate (set_injection_rate), + .lines_per_pkt (set_lines_per_pkt), + .traffic_patt (set_traffic_patt), + .s_axis_tdata (rtr2snk_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]), + .s_axis_tlast (rtr2snk_axis.tlast[i]), + .s_axis_tvalid (rtr2snk_axis.tvalid[i]), + .s_axis_tready (rtr2snk_axis.tready[i]), + .session_active (snk_active[i]), + .xfer_count (snk_xfer_count[i]), + .pkt_count (snk_pkt_count[i]), + .data_err_count (snk_data_err_count[i]), + .route_err_count (snk_route_err_count[i]) + ); + end endgenerate + + //---------------------------------------------------- + // Instantiate DUT + //---------------------------------------------------- + generate if (ROUTER_IMPL == "FIFO") begin + for (i = 0; i < ROUTER_PORTS; i=i+1) begin + axi_fifo #( + .WIDTH(ROUTER_DWIDTH+1), .SIZE(0) + ) fifo_i ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .i_tdata ({src2rtr_axis.tlast[i], src2rtr_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]}), + .i_tvalid (src2rtr_axis.tvalid[i]), + .i_tready (src2rtr_axis.tready[i]), + .o_tdata ({rtr2snk_axis.tlast[i], rtr2snk_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]}), + .o_tvalid (rtr2snk_axis.tvalid[i]), + .o_tready (rtr2snk_axis.tready[i]), + .space (), + .occupied () + ); + end + end else if (ROUTER_IMPL == "axi_crossbar") begin + axi_crossbar #( + .BASE (0), + .FIFO_WIDTH (ROUTER_DWIDTH), + .DST_WIDTH (16), + .NUM_INPUTS (ROUTER_PORTS), + .NUM_OUTPUTS (ROUTER_PORTS) + ) router_dut_i ( + // General + .clk (clk), + .reset (rst), + .clear (1'b0), + .local_addr (8'd0), + // Inputs + .i_tdata (src2rtr_axis.tdata), + .i_tlast (src2rtr_axis.tlast), + .i_tvalid (src2rtr_axis.tvalid), + .i_tready (src2rtr_axis.tready), + .pkt_present (src2rtr_axis.tvalid), + // Output + .o_tdata (rtr2snk_axis.tdata), + .o_tlast (rtr2snk_axis.tlast), + .o_tvalid (rtr2snk_axis.tvalid), + .o_tready (rtr2snk_axis.tready), + // Setting Bus + .set_stb (rtr_sb.settings_bus.set_stb), + .set_addr (rtr_sb.settings_bus.set_addr), + .set_data (rtr_sb.settings_bus.set_data), + // Readback bus + .rb_rd_stb (1'b0), + .rb_addr ({(2*$clog2(ROUTER_PORTS)){1'b0}}), + .rb_data () + ); + end else if (ROUTER_IMPL == "chdr_crossbar_nxn") begin + chdr_crossbar_nxn #( + .CHDR_W (ROUTER_DWIDTH), + .NPORTS (ROUTER_PORTS), + .DEFAULT_PORT (0), + .MTU (MTU_LOG2), + .ROUTE_TBL_SIZE (6), + .MUX_ALLOC ("ROUND-ROBIN"), + .OPTIMIZE ("AREA"), + .NPORTS_MGMT (0), + .EXT_RTCFG_PORT (1) + ) router_dut_i ( + // General + .clk (clk), + .reset (rst), + // Inputs + .s_axis_tdata (src2rtr_axis.tdata), + .s_axis_tlast (src2rtr_axis.tlast), + .s_axis_tvalid (src2rtr_axis.tvalid), + .s_axis_tready (src2rtr_axis.tready), + // Output + .m_axis_tdata (rtr2snk_axis.tdata), + .m_axis_tlast (rtr2snk_axis.tlast), + .m_axis_tvalid (rtr2snk_axis.tvalid), + .m_axis_tready (rtr2snk_axis.tready), + // External router config + .ext_rtcfg_stb (rtr_sb.settings_bus.set_stb), + .ext_rtcfg_addr (rtr_sb.settings_bus.set_addr), + .ext_rtcfg_data (rtr_sb.settings_bus.set_data), + .ext_rtcfg_ack (rtr_sb_ack) + ); + end else begin + axis_ctrl_crossbar_nxn #( + .WIDTH (ROUTER_DWIDTH), + .NPORTS (ROUTER_PORTS), + .TOPOLOGY (ROUTER_IMPL == "axis_ctrl_2d_torus" ? "TORUS" : "MESH"), + .INGRESS_BUFF_SIZE(MTU_LOG2), + .ROUTER_BUFF_SIZE (MTU_LOG2), + .ROUTING_ALLOC ("WORMHOLE"), + .SWITCH_ALLOC ("PRIO") + ) router_dut_i ( + // General + .clk (clk), + .reset (rst), + // Inputs + .s_axis_tdata (src2rtr_axis.tdata), + .s_axis_tlast (src2rtr_axis.tlast), + .s_axis_tvalid (src2rtr_axis.tvalid), + .s_axis_tready (src2rtr_axis.tready), + // Output + .m_axis_tdata (rtr2snk_axis.tdata), + .m_axis_tlast (rtr2snk_axis.tlast), + .m_axis_tvalid (rtr2snk_axis.tvalid), + .m_axis_tready (rtr2snk_axis.tready), + // Deadlock detection + .deadlock_detected(deadlock_detected) + ); + end endgenerate + + //---------------------------------------------------- + // Test routine. Runs tests and writes metrics to file + //---------------------------------------------------- + + // Constants + localparam [7:0] TRAFFIC_PATT_LOOPBACK = 8'd76; //L + localparam [7:0] TRAFFIC_PATT_NEIGHBOR = 8'd78; //N + localparam [7:0] TRAFFIC_PATT_BIT_COMPLEMENT = 8'd67; //C + localparam [7:0] TRAFFIC_PATT_SEQUENTIAL = 8'd83; //S + localparam [7:0] TRAFFIC_PATT_UNIFORM = 8'd85; //U + localparam [7:0] TRAFFIC_PATT_UNIFORM_OTHERS = 8'd79; //O + localparam [7:0] TRAFFIC_PATT_RANDOM_PERM = 8'd82; //R + + string filename; + integer node; + integer session = 0; + integer handle = 0; + logic [63:0] start_time; + integer total_pkts_recvd = 0, total_pkts_sent = 0; + + task sim_dataflow; + input [7:0] injection_rate; + input [7:0] traffic_patt; + input [15:0] lines_per_pkt; + input [31:0] num_pkts_to_send; + begin + session = session + 1; + $display("--------------- New Simulation ---------------"); + $display("- Module = %s", ROUTER_IMPL); + $display("- Nodes = %00d", ROUTER_PORTS); + $display("- Injection Rate = %00d%%", injection_rate); + $display("- Traffic Pattern = %c", traffic_patt); + $display("- Packet Size = %00d words (%00d bits)", lines_per_pkt, ROUTER_DWIDTH); + $display("- Max Packets = %00d", num_pkts_to_send); + // Configure settings + @(posedge clk); + set_injection_rate = injection_rate; + set_lines_per_pkt = lines_per_pkt; + set_traffic_patt = traffic_patt; + set_num_pkts_to_send = num_pkts_to_send; + @(posedge clk); + // Start the sink then the source + $display("Data flow starting..."); + snk_start_stb = 1; + src_start_stb = 1; + @(posedge clk); + src_start_stb = 0; + snk_start_stb = 0; + @(posedge clk); + start_time = timestamp; + // Wait for source blocks to finish generating + $display("Waiting for packets to transmit... (may take a while)"); + while (|src_active) begin + @(posedge clk); + if (deadlock_re) $display("WARNING: Deadlock detected"); + if (deadlock_fe) $display("Recovered from deadlock"); + end + // Wait for sink blocks to finish consuming + $display("All packets transmitted. Waiting to flush..."); + while (|snk_active) @(posedge clk); + // If router deadlocks then wait for it to recover + if (deadlock_detected) begin + $display("Waiting for deadlock recovery to finish..."); + while (deadlock_detected) @(posedge clk); + end + repeat(set_lines_per_pkt) @(posedge clk); + // Record summary to file and print to console + $sformat(filename, "%s/info_inj%03d_lpp%05d_traffic%c_sess%04d.csv", + FILE_PATH, injection_rate, lines_per_pkt, traffic_patt, session); + if (TEST_GEN_LL_FILES == 1) begin + handle = $fopen(filename, "w"); + if (handle == 0) begin + $error("Could not open file: %s", filename); + $finish(); + end + end + if (handle != 0) $fdisplay(handle, "Impl,Node,TxPkts,RxPkts,Duration,ErrRoute,ErrData"); + total_pkts_sent = 0; + total_pkts_recvd = 0; + for (node = 0; node < ROUTER_PORTS; node=node+1) begin + $display("- Node #%03d: TX = %5d pkts, RX = %5d pkts, Inj Rate = %3d%%. Errs = %5d route, %5d data", + node,src_pkt_count[node], snk_pkt_count[node], ((src_xfer_count[node]*100)/session_duration[node]), + snk_route_err_count[node], snk_data_err_count[node]); + if (handle != 0) $fdisplay(handle, "%s,%00d,%00d,%00d,%00d,%00d,%00d", ROUTER_IMPL, + node,src_pkt_count[node], snk_pkt_count[node], session_duration[node], + snk_route_err_count[node], snk_data_err_count[node]); + total_pkts_sent = total_pkts_sent + src_pkt_count[node]; + total_pkts_recvd = total_pkts_recvd + snk_pkt_count[node]; + `ASSERT_ERROR(snk_route_err_count[node] == 0, "Routing errors. Received packets destined to other nodes"); + `ASSERT_ERROR(snk_data_err_count[node] == 0, "Integrity errors. Received corrupted packets"); + end + $display("Finished. Elapsed = %00d cycles, TX = %00d pkts, RX = %00d pkts", + (timestamp - start_time), total_pkts_sent, total_pkts_recvd); + `ASSERT_ERROR(total_pkts_recvd == total_pkts_sent, "Total # TX packets did not match the total # RX packets"); + if (handle != 0) $fclose(handle); + $display("----------------------------------------------"); + end + endtask + + //---------------------------------------------------- + // Main test loop + //---------------------------------------------------- + + logic [31:0] MAX_PACKETS = TEST_MAX_PACKETS; + logic [15:0] LPP = TEST_LPP; + integer MIN_INJ_RATE = TEST_MIN_INJ_RATE; + integer MAX_INJ_RATE = TEST_MAX_INJ_RATE; + integer INJ_RATE_INCR = TEST_INJ_RATE_INCR; + + integer inj_rate = 0; + initial begin : tb_main + src_start_stb = 0; + snk_start_stb = 0; + rtr_sb.reset(); + while (rst) @(posedge clk); + + repeat (10) @(posedge clk); + + `TEST_CASE_START("Set up crossbar"); + for (node = 0; node < ROUTER_PORTS; node=node+1) begin + if (ROUTER_IMPL == "axi_crossbar") begin + rtr_sb.write(16'd256 + node[15:0], {16'h0, node[15:0]}); + end else if (ROUTER_IMPL == "chdr_crossbar_nxn") begin + rtr_sb.write(node[15:0], {16'h0, node[15:0]}); + while (~rtr_sb_ack) @(posedge clk); + end + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate LOOPBACK Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_LOOPBACK, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate SEQUENTIAL Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_SEQUENTIAL, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate UNIFORM Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_UNIFORM, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate UNIFORM_OTHERS Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_UNIFORM_OTHERS, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate BIT_COMPLEMENT Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_BIT_COMPLEMENT, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate NEIGHBOR Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_NEIGHBOR, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_BENCH_DONE + end // initial begin + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py new file mode 100755 index 000000000..35821c2c4 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +# +# Copyright 2018 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# +# Description +# Parses the output files generated by crossbar_tb and outputs +# a load-latency graph and a expected-actual throughput graph + +import os, sys +import argparse +import time +import glob +import csv +import re +import numpy as np + +import matplotlib +#matplotlib.use('Agg') +import matplotlib.pyplot as plt + +def get_options(): + parser = argparse.ArgumentParser(description='Generate Load Latency Graphs') + parser.add_argument('datadir', type=str, default='.', help='Location of packet capture files generated by testbench') + return parser.parse_args() + +TRAFFIC_PATTERNS = {'U':'UNIFORM', 'O':'UNIFORM_OTHERS', 'N':'NEIGHBOR', 'L':'LOOPBACK', 'S':'SEQUENTIAL', 'C':'BIT_COMPLEMENT', 'R':'RANDOM_PERM'} + +class InfoFile(): + def __init__(self, filename): + # Extract test info from filename + m = re.search(r".*/info_inj([0-9]+)_lpp([0-9]+)_traffic(.)_sess([0-9]+)\.csv", filename) + if m is None: + raise ValueError('Incorrect filename format: %s'%(filename)) + self.inj_rate = int(m.group(1)) + self.lpp = int(m.group(2)) + self.traffic_patt = TRAFFIC_PATTERNS[m.group(3)] + self.session = int(m.group(4)) + + self.tx_pkts = 0 + self.rx_pkts = 0 + self.duration = 0 + self.errs = 0 + self.nodes = 0 + with open(filename, 'r') as csvfile: + reader = csv.reader(csvfile, delimiter=',') + isheader = True + for row in reader: + if isheader: + isheader = False + if row != ['Impl', 'Node', 'TxPkts', 'RxPkts', 'Duration', 'ErrRoute', 'ErrData']: + raise ValueError('Incorrect header: %s'%(filename)) + else: + self.impl = row[0] + self.tx_pkts = self.tx_pkts + int(row[2]) + self.rx_pkts = self.tx_pkts + int(row[3]) + self.duration = self.duration + int(row[4]) + self.errs = self.errs + int(row[5]) + int(row[6]) + self.nodes = self.nodes + 1 + self.real_inj_rate = (100.0 * self.tx_pkts * self.lpp) / self.duration + +class PktFile(): + def __init__(self, filename): + # Extract test info from filename + m = re.search(r".*/pkts_node([0-9]+)_inj([0-9]+)_lpp([0-9]+)_traffic(.)_sess([0-9]+)\.csv", filename) + if m is None: + raise ValueError('Incorrect filename format: %s'%(filename)) + self.node = int(m.group(1)) + self.inj_rate = int(m.group(2)) + self.lpp = int(m.group(3)) + self.traffic_patt = TRAFFIC_PATTERNS[m.group(4)] + self.session = int(m.group(5)) + + self.latencies = [] + with open(filename, 'r') as csvfile: + reader = csv.reader(csvfile, delimiter=',') + isheader = True + for row in reader: + if isheader: + isheader = False + if row != ['Src', 'Dst', 'Seqno', 'Error', 'Latency']: + raise ValueError('Incorrect header: %s'%(filename)) + else: + self.latencies.append(int(row[4])) + + +######################################################################## +# main +######################################################################## +if __name__=='__main__': + options = get_options() + + if (not os.path.isdir(options.datadir)): + print('ERROR: Data director %s does not exist'%(options.datadir)) + sys.exit(1) + + info_db = dict() + info_files = glob.glob(os.path.join(options.datadir, 'info*.csv')) + router_impl = '' + lines_per_pkt = 0 + for ifile in info_files: + print('INFO: Reading %s...'%(ifile)) + tmp = InfoFile(ifile) + router_impl = tmp.impl # Assume that all files have the same impl + lines_per_pkt = tmp.lpp # Assume that all files have the same LPP + info_db[(tmp.lpp, tmp.traffic_patt, tmp.inj_rate)] = tmp + + pkt_db = dict() + pkts_files = glob.glob(os.path.join(options.datadir, 'pkts*.csv')) + for pfile in pkts_files: + print('INFO: Reading %s...'%(pfile)) + tmp = PktFile(pfile) + config_key = (tmp.lpp, tmp.traffic_patt) + if config_key not in pkt_db: + pkt_db[config_key] = dict() + if tmp.inj_rate not in pkt_db[config_key]: + pkt_db[config_key][tmp.inj_rate] = [] + + + pkt_db[config_key][tmp.inj_rate].extend(tmp.latencies) + + # Write load-latency plots to file + actual_inj_rate_db = dict() + for config in sorted(pkt_db): + (lpp, traffic_patt) = config + ll_file = 'load-latency_%s_traffic-%s_lpp-%d.png'%(router_impl, traffic_patt, lpp) + print('INFO: Writing file ' + ll_file + '...') + percentile = [0, 25, 50, 75, 90, 95, 99, 99.9, 100] + plt.figure() + plt.title('Load Latency Graph for %s\n(Traffic: %s, LPP: %d)'%(router_impl, traffic_patt, lpp)) + for p in percentile: + plot_data = dict() + for inj_rate in pkt_db[config]: + real_inj_rate = info_db[(lpp, traffic_patt, inj_rate)].real_inj_rate + plot_data[real_inj_rate] = np.percentile(pkt_db[config][inj_rate], p) + latencies = [] + rates = [] + for inj_rate in sorted(plot_data): + rates.append(inj_rate) + latencies.append(plot_data[inj_rate]) + plt.plot(rates, latencies, label='$P_{%.1f}$'%(p)) + plt.xlabel('Load (%)') + plt.xticks(range(0, 110, 10)) + plt.ylabel('Latency (cycles)') + plt.grid(True) + plt.legend() + plt.savefig(os.path.join(options.datadir, ll_file), dpi=120) + # Generate actual inj_rate graph + real_inj_rates = [] + for inj_rate in sorted(pkt_db[config]): + real_inj_rates.append(info_db[(lpp, traffic_patt, inj_rate)].real_inj_rate) + actual_inj_rate_db[config] = (sorted(pkt_db[config]), real_inj_rates) + + # Write offered vs actual injection rate plots to file + injrate_file = 'injection-rate_%s_lpp-%d.png'%(router_impl, lines_per_pkt) + print('INFO: Writing file ' + injrate_file + '...') + plt.figure() + plt.title('Max Injection Rate Graph for %s'%(router_impl)) + for config in actual_inj_rate_db: + (x, y) = actual_inj_rate_db[config] + plt.plot(x, y, label=str(config)) + plt.xlabel('Offered Injection Rate (%)') + plt.xticks(range(0, 110, 10)) + plt.ylabel('Accepted Injection Rate (%)') + plt.yticks(range(0, 110, 10)) + plt.grid(True) + plt.legend() + plt.savefig(os.path.join(options.datadir, injrate_file), dpi=120)
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py new file mode 100755 index 000000000..8e546fef9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py @@ -0,0 +1,106 @@ +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# +# Description +# Run the crossbar testbench (crossbar_tb) for varios parameter +# configurations and generates load-latency graphs for each run. + +import argparse +import math +import os, sys +import shutil +import glob +import subprocess + +g_tb_top_template = """ +`timescale 1ns/1ps +module crossbar_tb_auto(); + crossbar_tb #( + .TEST_NAME ("crossbar_tb_auto"), + .ROUTER_IMPL ("{rtr_impl}"), + .ROUTER_PORTS ({rtr_ports}), + .ROUTER_DWIDTH ({rtr_width}), + .MTU_LOG2 ({rtr_mtu}), + .NUM_MASTERS ({rtr_sources}), + .TEST_MAX_PACKETS ({tst_maxpkts}), + .TEST_LPP ({tst_lpp}), + .TEST_MIN_INJ_RATE ({tst_injrate_min}), + .TEST_MAX_INJ_RATE ({tst_injrate_max}), + .TEST_INJ_RATE_INCR (10), + .TEST_GEN_LL_FILES (1) + ) impl ( + /* no IO */ + ); +endmodule +""" + +g_test_params = { + 'data': {'rtr_width':64, 'rtr_mtu':7, 'tst_maxpkts':100, 'tst_lpp':100, 'tst_injrate_min':30, 'tst_injrate_max':100}, + 'ctrl': {'rtr_width':64, 'rtr_mtu':5, 'tst_maxpkts':100, 'tst_lpp':20, 'tst_injrate_min':10, 'tst_injrate_max':50}, +} + +g_xb_types = { + 'chdr_crossbar_nxn':'data', 'axi_crossbar':'data', + 'axis_ctrl_2d_torus':'ctrl', 'axis_ctrl_2d_mesh':'ctrl' +} + +def get_options(): + parser = argparse.ArgumentParser(description='Run correctness sim and generate load-latency plots') + parser.add_argument('--impl', type=str, default='chdr_crossbar_nxn', help='Implementation (CSV) [%s]'%(','.join(g_xb_types.keys()))) + parser.add_argument('--ports', type=str, default='16', help='Number of ports (CSV)') + parser.add_argument('--sources', type=str, default='16', help='Number of active data sources (masters)') + return parser.parse_args() + +def launch_run(impl, ports, sources): + run_name = '%s_ports%d_srcs%d'%(impl, ports, sources) + # Prepare a transform map to autogenerate a TB file + transform = {'rtr_impl':impl, 'rtr_ports':ports, 'rtr_sources':sources} + for k,v in g_test_params[g_xb_types[impl]].items(): + transform[k] = v + # Create crossbar_tb_auto.sv with specified parameters + with open('crossbar_tb_auto.sv', 'w') as out_file: + out_file.write(g_tb_top_template.format(**transform)) + # Create data directory for the simulation + data_dir = os.path.join('data', impl) + export_dir = os.path.join('data', run_name) + try: + os.makedirs('data') + except FileExistsError: + pass + os.makedirs(data_dir) + os.makedirs(export_dir) + # Run "make xsim" + exitcode = subprocess.Popen('make xsim TB_TOP_MODULE=crossbar_tb_auto', shell=True).wait() + if exitcode != 0: + raise RuntimeError('Error running "make xsim". Was setupenv.sh run?') + # Generate load-latency graphs + exitcode = subprocess.Popen('gen_load_latency_graph.py ' + data_dir, shell=True).wait() + if exitcode != 0: + raise RuntimeError('Error running "gen_load_latency_graph.py"') + # Copy files + os.rename('xsim.log', os.path.join(export_dir, 'xsim.log')) + for file in glob.glob(os.path.join(data_dir, '*.png')): + shutil.copy(file, export_dir) + # Cleanup outputs + subprocess.Popen('make cleanall', shell=True).wait() + try: + os.remove('crossbar_tb_auto.sv') + except FileNotFoundError: + pass + try: + shutil.rmtree(data_dir) + except OSError: + print('WARNING: Could not delete ' + data_dir) + +def main(): + args = get_options(); + for impl in args.impl.strip().split(','): + for ports in args.ports.strip().split(','): + for sources in args.sources.strip().split(','): + launch_run(impl, int(ports), min(int(ports), int(sources))) + +if __name__ == '__main__': + main() diff --git a/fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py b/fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py new file mode 100755 index 000000000..a2eaf71fb --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py @@ -0,0 +1,125 @@ +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +import argparse +import math +import sys +import datetime +import random + +# Parse command line options +# ------------------------------------------------ +def get_options(): + parser = argparse.ArgumentParser(description='Generate a node to coordinate mapping file.') + parser.add_argument('--pattern', type=str, default='xy', choices=['xy', 'yx', 'spiral', 'random'], help='Node distribution pattern') + parser.add_argument('--dimsize', type=int, default=4, help='Maximum dimension size') + parser.add_argument('--seed', type=int, default=None, help='Seed for random permutation generator') + return parser.parse_args() + +# Pattern Generators +# ------------------------------------------------ +def gen_xy(N): + nodes = dict() + for y in range(N): + for x in range(N): + nodes[(y*N + x)] = (x, y) + return nodes + +def gen_yx(N): + nodes = dict() + for y in range(N): + for x in range(N): + nodes[(x*N + y)] = (x, y) + return nodes + +def gen_spiral(N): + nodes = dict() + x = y = 0 + dx = 0 + dy = -1 + for i in range(N**2): + if (-N/2 < x <= N/2) and (-N/2 < y <= N/2): + nodes[i] = (x + int(math.ceil(N/2)) - 1, y + int(math.ceil(N/2)) - 1) + if x == y or (x < 0 and x == -y) or (x > 0 and x == 1-y): + dx, dy = -dy, dx + x, y = x+dx, y+dy + return nodes + +def gen_random(N): + nodes = dict() + rnodes = random.sample(range(N*N), N*N) + for y in range(N): + for x in range(N): + nodes[rnodes[x*N + y]] = (x, y) + return nodes + +# Source Generators +# ------------------------------------------------ +def layout_nodes(nodes): + N = int(math.sqrt(len(nodes))) + #inv_nodes = {v: k for k, v in nodes.iteritems()} + coords = {v: k for k, v in nodes.items()} + lines = [] + for y in range(N): + line = '' + for x in range(N): + line += '%5d'%(coords[(x,y)]) + lines.append(line) + return lines + +def gen_vparams(nodes, N, pattern): + src_lines = [ '\n// DIM_SIZE = %d, PATTERN = %s'%(N,pattern.upper()), '//------------------------------------' ] + for l in layout_nodes(nodes): + src_lines.append('// ' + l) + bitw = math.ceil(math.log2(N)) + xvals = ','.join(['%d\'d%d'%(bitw,v[0]) for k, v in sorted(nodes.items(), reverse=True)]) + yvals = ','.join(['%d\'d%d'%(bitw,v[1]) for k, v in sorted(nodes.items(), reverse=True)]) + xpar = 'localparam [%d:0] XCOORD_DIM_%03d = {%s};'%(bitw*N*N-1, N, xvals) + ypar = 'localparam [%d:0] YCOORD_DIM_%03d = {%s};'%(bitw*N*N-1, N, yvals) + src_lines.append(xpar) + src_lines.append(ypar) + src_lines.append('') + return src_lines + +def gen_lookup_func(dim, N): + src_lines = [ 'function [CLOG2_DIM_SIZE-1:0] node_to_%sdst;'%(dim), ' input [WIDTH-1:0] header;', 'begin'] + dim_sizes = range(2, N+1) + for i in dim_sizes: + node_bitw = math.ceil(math.log2(i*i)) + dim_bitw = math.ceil(math.log2(i)) + prefix = ' ' if (i == dim_sizes[0]) else ' else ' + src_lines.append(prefix + 'if (DIM_SIZE == %d)'%(i)) + src_lines.append(' node_to_%sdst = %sCOORD_DIM_%03d[%d*header[%d:0] +: %d];'%(dim,dim.upper(),i,dim_bitw,node_bitw-1,dim_bitw)) + src_lines.append(' else') + src_lines.append(' node_to_%sdst = {CLOG2_DIM_SIZE{1\'d0}};'%(dim)) + src_lines.append('end endfunction\n\n') + return src_lines + +def gen_vheader(dimsize, mapgen, pattern, filename): + with open(filename, 'w') as vhfile: + vhfile.write('// Copyright %s Ettus Research, A National Instruments Company\n'%(datetime.datetime.now().year)) + vhfile.write('// SPDX-License-Identifier: LGPL-3.0-or-later\n') + vhfile.write('//\n') + vhfile.write('// Autogenerated file. Do not modify.\n') + vhfile.write('// $ %s\n'%(' '.join(sys.argv[:]))) + vhfile.write('\nparameter CLOG2_DIM_SIZE = $clog2(DIM_SIZE); //Vivado workaround\n\n') + for i in range(2, dimsize+1): + nodes = mapgen(i) + N = math.sqrt(len(nodes)) + vhfile.write('\n'.join(gen_vparams(nodes, N, pattern))) + vhfile.write('\n\n') + vhfile.write('\n'.join(gen_lookup_func('x', dimsize))) + vhfile.write('\n'.join(gen_lookup_func('y', dimsize))) + +def main(): + args = get_options(); + random.seed(args.seed) + generators = {'xy': gen_xy, 'yx': gen_yx, 'spiral':gen_spiral, 'random': gen_random} + gen_vheader(args.dimsize, generators[args.pattern], args.pattern, 'mesh_node_mapping.vh') + +if __name__ == '__main__': + main() diff --git a/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v new file mode 100644 index 000000000..e0338347b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v @@ -0,0 +1,481 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: mesh_2d_dor_router_multi_sw +// Description: +// Alternate implementation for mesh_2d_dor_router_single_sw with +// multiple switches for independent paths between inputs and outputs +// **NOTE**: This module has not been validated + +module mesh_2d_dor_router_multi_sw #( + parameter WIDTH = 64, + parameter DIM_SIZE = 4, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0, + parameter TERM_BUFF_SIZE = 5, + parameter XB_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", // Routing (switching) method {WORMHOLE, CUT-THROUGH} + parameter SWITCH_ALLOC = "PRIO" // Switch allocation algorithm {ROUND-ROBIN, PRIO} +) ( + // Clocks and resets + input wire clk, + input wire reset, + + // Terminal connections + input wire [WIDTH-1:0] s_axis_ter_tdata, + input wire s_axis_ter_tlast, + input wire s_axis_ter_tvalid, + output wire s_axis_ter_tready, + output wire [WIDTH-1:0] m_axis_ter_tdata, + output wire m_axis_ter_tlast, + output wire m_axis_ter_tvalid, + input wire m_axis_ter_tready, + + // West inter-router connections + input wire [WIDTH-1:0] s_axis_wst_tdata, + input wire [0:0] s_axis_wst_tdest, + input wire s_axis_wst_tlast, + input wire s_axis_wst_tvalid, + output wire s_axis_wst_tready, + output wire [WIDTH-1:0] m_axis_wst_tdata, + output wire [0:0] m_axis_wst_tdest, + output wire m_axis_wst_tlast, + output wire m_axis_wst_tvalid, + input wire m_axis_wst_tready, + + // East inter-router connections + input wire [WIDTH-1:0] s_axis_est_tdata, + input wire [0:0] s_axis_est_tdest, + input wire s_axis_est_tlast, + input wire s_axis_est_tvalid, + output wire s_axis_est_tready, + output wire [WIDTH-1:0] m_axis_est_tdata, + output wire [0:0] m_axis_est_tdest, + output wire m_axis_est_tlast, + output wire m_axis_est_tvalid, + input wire m_axis_est_tready, + + // North inter-router connections + input wire [WIDTH-1:0] s_axis_nor_tdata, + input wire [0:0] s_axis_nor_tdest, + input wire s_axis_nor_tlast, + input wire s_axis_nor_tvalid, + output wire s_axis_nor_tready, + output wire [WIDTH-1:0] m_axis_nor_tdata, + output wire [0:0] m_axis_nor_tdest, + output wire m_axis_nor_tlast, + output wire m_axis_nor_tvalid, + input wire m_axis_nor_tready, + + // South inter-router connections + input wire [WIDTH-1:0] s_axis_sou_tdata, + input wire [0:0] s_axis_sou_tdest, + input wire s_axis_sou_tlast, + input wire s_axis_sou_tvalid, + output wire s_axis_sou_tready, + output wire [WIDTH-1:0] m_axis_sou_tdata, + output wire [0:0] m_axis_sou_tdest, + output wire m_axis_sou_tlast, + output wire m_axis_sou_tvalid, + input wire m_axis_sou_tready +); + // ------------------------------------------------- + // Routing functions + // ------------------------------------------------- + `include "mesh_node_mapping.vh" + + function [2:0] term_route; + input [WIDTH-1:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + term_route = 3'd0; //TER + end else if (xdst == XB_ADDR_X) begin + if (ydiff < 0) + term_route = 3'd3; //NOR + else + term_route = 3'd4; //SOU + end else begin + if (xdiff < 0) + term_route = 3'd1; //WST + else + term_route = 3'd2; //EST + end + end + endfunction + + function [1:0] xdim_route; + input [WIDTH-1:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + xdim_route = 2'd0; //TER + end else if (xdst == XB_ADDR_X) begin + if (ydiff < 0) + xdim_route = 2'd2; //NOR + else + xdim_route = 2'd3; //SOU + end else begin + xdim_route = 2'd1; //Forward + end + end + endfunction + + function [0:0] ydim_route; + input [WIDTH-1:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + ydim_route = 1'd0; //TER + end else if (xdst == XB_ADDR_X) begin + ydim_route = 1'd1; //Forward + end + end + endfunction + + + // ------------------------------------------------- + // Input buffers + // ------------------------------------------------- + wire [WIDTH-1:0] ter_i_tdata; + wire ter_i_tlast; + wire ter_i_tvalid; + wire ter_i_tready; + + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE) + ) term_in_pkt_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_ter_tdata), + .i_tlast (s_axis_ter_tlast), + .i_tvalid (s_axis_ter_tvalid), + .i_tready (s_axis_ter_tready), + .i_terror (1'b0), + .o_tdata (ter_i_tdata), + .o_tlast (ter_i_tlast), + .o_tvalid (ter_i_tvalid), + .o_tready (ter_i_tready) + ); + + wire [WIDTH-1:0] wst_i_tdata, est_i_tdata, nor_i_tdata, sou_i_tdata; + wire wst_i_tlast, est_i_tlast, nor_i_tlast, sou_i_tlast; + wire wst_i_tvalid, est_i_tvalid, nor_i_tvalid, sou_i_tvalid; + wire wst_i_tready, est_i_tready, nor_i_tready, sou_i_tready; + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) wst_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_wst_tdata), + .s_axis_tdest (s_axis_wst_tdest), + .s_axis_tlast (s_axis_wst_tlast), + .s_axis_tvalid (s_axis_wst_tvalid), + .s_axis_tready (s_axis_wst_tready), + .m_axis_tdata (wst_i_tdata), + .m_axis_tlast (wst_i_tlast), + .m_axis_tvalid (wst_i_tvalid), + .m_axis_tready (wst_i_tready) + ); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) est_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_est_tdata), + .s_axis_tdest (s_axis_est_tdest), + .s_axis_tlast (s_axis_est_tlast), + .s_axis_tvalid (s_axis_est_tvalid), + .s_axis_tready (s_axis_est_tready), + .m_axis_tdata (est_i_tdata), + .m_axis_tlast (est_i_tlast), + .m_axis_tvalid (est_i_tvalid), + .m_axis_tready (est_i_tready) + ); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) nor_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_nor_tdata), + .s_axis_tdest (s_axis_nor_tdest), + .s_axis_tlast (s_axis_nor_tlast), + .s_axis_tvalid (s_axis_nor_tvalid), + .s_axis_tready (s_axis_nor_tready), + .m_axis_tdata (nor_i_tdata), + .m_axis_tlast (nor_i_tlast), + .m_axis_tvalid (nor_i_tvalid), + .m_axis_tready (nor_i_tready) + ); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) sou_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_sou_tdata), + .s_axis_tdest (s_axis_sou_tdest), + .s_axis_tlast (s_axis_sou_tlast), + .s_axis_tvalid (s_axis_sou_tvalid), + .s_axis_tready (s_axis_sou_tready), + .m_axis_tdata (sou_i_tdata), + .m_axis_tlast (sou_i_tlast), + .m_axis_tvalid (sou_i_tvalid), + .m_axis_tready (sou_i_tready) + ); + + // ------------------------------------------------- + // Input demuxes + // ------------------------------------------------- + + wire [WIDTH-1:0] t2t_tdata, t2w_tdata, t2e_tdata, t2n_tdata, t2s_tdata; + wire t2t_tlast, t2w_tlast, t2e_tlast, t2n_tlast, t2s_tlast; + wire t2t_tvalid, t2w_tvalid, t2e_tvalid, t2n_tvalid, t2s_tvalid; + wire t2t_tready, t2w_tready, t2e_tready, t2n_tready, t2s_tready; + + wire [WIDTH-1:0] w2t_tdata, w2e_tdata, w2n_tdata, w2s_tdata; + wire w2t_tlast, w2e_tlast, w2n_tlast, w2s_tlast; + wire w2t_tvalid, w2e_tvalid, w2n_tvalid, w2s_tvalid; + wire w2t_tready, w2e_tready, w2n_tready, w2s_tready; + + wire [WIDTH-1:0] e2t_tdata, e2w_tdata, e2n_tdata, e2s_tdata; + wire e2t_tlast, e2w_tlast, e2n_tlast, e2s_tlast; + wire e2t_tvalid, e2w_tvalid, e2n_tvalid, e2s_tvalid; + wire e2t_tready, e2w_tready, e2n_tready, e2s_tready; + + wire [WIDTH-1:0] n2t_tdata, n2s_tdata; + wire n2t_tlast, n2s_tlast; + wire n2t_tvalid, n2s_tvalid; + wire n2t_tready, n2s_tready; + + wire [WIDTH-1:0] s2t_tdata, s2n_tdata; + wire s2t_tlast, s2n_tlast; + wire s2t_tvalid, s2n_tvalid; + wire s2t_tready, s2n_tready; + + wire [WIDTH-1:0] ter_i_hdr, wst_i_hdr, est_i_hdr, nor_i_hdr, sou_i_hdr; + + axi_demux #( + .WIDTH(WIDTH), .SIZE(5), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) ter_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (ter_i_hdr), + .dest (term_route(ter_i_hdr)), + .i_tdata (ter_i_tdata), + .i_tlast (ter_i_tlast), + .i_tvalid (ter_i_tvalid), + .i_tready (ter_i_tready), + .o_tdata ({t2s_tdata, t2n_tdata, t2e_tdata, t2w_tdata, t2t_tdata}), + .o_tlast ({t2s_tlast, t2n_tlast, t2e_tlast, t2w_tlast, t2t_tlast}), + .o_tvalid ({t2s_tvalid, t2n_tvalid, t2e_tvalid, t2w_tvalid, t2t_tvalid}), + .o_tready ({t2s_tready, t2n_tready, t2e_tready, t2w_tready, t2t_tready}) + ); + + axi_demux #( + .WIDTH(WIDTH), .SIZE(4), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) wst_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (wst_i_hdr), + .dest (xdim_route(wst_i_hdr)), + .i_tdata (wst_i_tdata), + .i_tlast (wst_i_tlast), + .i_tvalid (wst_i_tvalid), + .i_tready (wst_i_tready), + .o_tdata ({w2s_tdata, w2n_tdata, w2e_tdata, w2t_tdata}), + .o_tlast ({w2s_tlast, w2n_tlast, w2e_tlast, w2t_tlast}), + .o_tvalid ({w2s_tvalid, w2n_tvalid, w2e_tvalid, w2t_tvalid}), + .o_tready ({w2s_tready, w2n_tready, w2e_tready, w2t_tready}) + ); + + axi_demux #( + .WIDTH(WIDTH), .SIZE(4), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) est_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (est_i_hdr), + .dest (xdim_route(est_i_hdr)), + .i_tdata (est_i_tdata), + .i_tlast (est_i_tlast), + .i_tvalid (est_i_tvalid), + .i_tready (est_i_tready), + .o_tdata ({e2s_tdata, e2n_tdata, e2w_tdata, e2t_tdata}), + .o_tlast ({e2s_tlast, e2n_tlast, e2w_tlast, e2t_tlast}), + .o_tvalid ({e2s_tvalid, e2n_tvalid, e2w_tvalid, e2t_tvalid}), + .o_tready ({e2s_tready, e2n_tready, e2w_tready, e2t_tready}) + ); + + axi_demux #( + .WIDTH(WIDTH), .SIZE(2), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) nor_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (nor_i_hdr), + .dest (ydim_route(nor_i_hdr)), + .i_tdata (nor_i_tdata), + .i_tlast (nor_i_tlast), + .i_tvalid (nor_i_tvalid), + .i_tready (nor_i_tready), + .o_tdata ({n2t_tdata, n2s_tdata}), + .o_tlast ({n2t_tlast, n2s_tlast}), + .o_tvalid ({n2t_tvalid, n2s_tvalid}), + .o_tready ({n2t_tready, n2s_tready}) + ); + + axi_demux #( + .WIDTH(WIDTH), .SIZE(2), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) sou_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (sou_i_hdr), + .dest (ydim_route(sou_i_hdr)), + .i_tdata (sou_i_tdata), + .i_tlast (sou_i_tlast), + .i_tvalid (sou_i_tvalid), + .i_tready (sou_i_tready), + .o_tdata ({s2t_tdata, s2n_tdata}), + .o_tlast ({s2t_tlast, s2n_tlast}), + .o_tvalid ({s2t_tvalid, s2n_tvalid}), + .o_tready ({s2t_tready, s2n_tready}) + ); + + // ------------------------------------------------- + // Output muxes + // ------------------------------------------------- + + axi_mux #( + .WIDTH(WIDTH), .SIZE(5), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) ter_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2t_tdata, w2t_tdata, e2t_tdata, n2t_tdata, s2t_tdata}), + .i_tlast ({t2t_tlast, w2t_tlast, e2t_tlast, n2t_tlast, s2t_tlast}), + .i_tvalid ({t2t_tvalid, w2t_tvalid, e2t_tvalid, n2t_tvalid, s2t_tvalid}), + .i_tready ({t2t_tready, w2t_tready, e2t_tready, n2t_tready, s2t_tready}), + .o_tdata (m_axis_ter_tdata), + .o_tlast (m_axis_ter_tlast), + .o_tvalid (m_axis_ter_tvalid), + .o_tready (m_axis_ter_tready) + ); + + axi_mux #( + .WIDTH(WIDTH), .SIZE(2), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) wst_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2w_tdata, e2w_tdata}), + .i_tlast ({t2w_tlast, e2w_tlast}), + .i_tvalid ({t2w_tvalid, e2w_tvalid}), + .i_tready ({t2w_tready, e2w_tready}), + .o_tdata (m_axis_wst_tdata), + .o_tlast (m_axis_wst_tlast), + .o_tvalid (m_axis_wst_tvalid), + .o_tready (m_axis_wst_tready) + ); + assign m_axis_wst_tdest = 1'b0; + + axi_mux #( + .WIDTH(WIDTH), .SIZE(2), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) est_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2e_tdata, w2e_tdata}), + .i_tlast ({t2e_tlast, w2e_tlast}), + .i_tvalid ({t2e_tvalid, w2e_tvalid}), + .i_tready ({t2e_tready, w2e_tready}), + .o_tdata (m_axis_est_tdata), + .o_tlast (m_axis_est_tlast), + .o_tvalid (m_axis_est_tvalid), + + + .o_tready (m_axis_est_tready) + ); + assign m_axis_est_tdest = 1'b0; + + axi_mux #( + .WIDTH(WIDTH), .SIZE(4), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) nor_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2n_tdata, w2n_tdata, e2n_tdata, s2n_tdata}), + .i_tlast ({t2n_tlast, w2n_tlast, e2n_tlast, s2n_tlast}), + .i_tvalid ({t2n_tvalid, w2n_tvalid, e2n_tvalid, s2n_tvalid}), + .i_tready ({t2n_tready, w2n_tready, e2n_tready, s2n_tready}), + .o_tdata (m_axis_nor_tdata), + .o_tlast (m_axis_nor_tlast), + .o_tvalid (m_axis_nor_tvalid), + .o_tready (m_axis_nor_tready) + ); + assign m_axis_nor_tdest = 1'b0; + + axi_mux #( + .WIDTH(WIDTH), .SIZE(4), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) sou_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2s_tdata, w2s_tdata, e2s_tdata, n2s_tdata}), + .i_tlast ({t2s_tlast, w2s_tlast, e2s_tlast, n2s_tlast}), + .i_tvalid ({t2s_tvalid, w2s_tvalid, e2s_tvalid, n2s_tvalid}), + .i_tready ({t2s_tready, w2s_tready, e2s_tready, n2s_tready}), + .o_tdata (m_axis_sou_tdata), + .o_tlast (m_axis_sou_tlast), + .o_tvalid (m_axis_sou_tvalid), + .o_tready (m_axis_sou_tready) + ); + assign m_axis_sou_tdest = 1'b0; + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v new file mode 100644 index 000000000..65cded545 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v @@ -0,0 +1,398 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: mesh_2d_dor_router_single_sw +// Description: +// This module implements the router for a 2-dimentional (2d) +// mesh network that uses dimension order routing (dor) and has a +// single underlying switch (single_sw). It uses AXI-Stream for all of its +// links. +// The mesh topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - WIDTH: Width of the AXI-Stream data bus +// - DIM_SIZE: Number of routers alone one dimension +// - XB_ADDR_X: The X-coordinate of this router in the topology +// - XB_ADDR_Y: The Y-coordinate of this router in the topology +// - TERM_BUFF_SIZE: log2 of the ingress terminal buffer size (in words) +// - XB_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words) +// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers. +// * WORMHOLE: Allocate route as soon as first word in pkt arrives +// * CUT-THROUGH: Allocate route only after the full pkt arrives +// - SWITCH_ALLOC: Algorithm to allocate the switch +// * PRIO: Priority based. Priority: Y-dim > X-dim > Term +// * ROUND-ROBIN: Round robin input port allocation +// Signals: +// - *_axis_ter_*: Terminal ports (master/slave) +// - *_axis_wst_*: Inter-router X-dim west connections (master/slave) +// - *_axis_est_*: Inter-router X-dim east connections (master/slave) +// - *_axis_nor_*: Inter-router X-dim north connections (master/slave) +// - *_axis_sou_*: Inter-router X-dim south connections (master/slave) +// + +module mesh_2d_dor_router_single_sw #( + parameter WIDTH = 64, + parameter DIM_SIZE = 4, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0, + parameter TERM_BUFF_SIZE = 5, + parameter XB_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", // Routing (switching) method {WORMHOLE, CUT-THROUGH} + parameter SWITCH_ALLOC = "PRIO" // Switch allocation algorithm {ROUND-ROBIN, PRIO} +) ( + // Clocks and resets + input wire clk, + input wire reset, + + // Terminal connections + input wire [WIDTH-1:0] s_axis_ter_tdata, + input wire s_axis_ter_tlast, + input wire s_axis_ter_tvalid, + output wire s_axis_ter_tready, + output wire [WIDTH-1:0] m_axis_ter_tdata, + output wire m_axis_ter_tlast, + output wire m_axis_ter_tvalid, + input wire m_axis_ter_tready, + + // West inter-router connections + input wire [WIDTH-1:0] s_axis_wst_tdata, + input wire [0:0] s_axis_wst_tdest, + input wire s_axis_wst_tlast, + input wire s_axis_wst_tvalid, + output wire s_axis_wst_tready, + output wire [WIDTH-1:0] m_axis_wst_tdata, + output wire [0:0] m_axis_wst_tdest, + output wire m_axis_wst_tlast, + output wire m_axis_wst_tvalid, + input wire m_axis_wst_tready, + + // East inter-router connections + input wire [WIDTH-1:0] s_axis_est_tdata, + input wire [0:0] s_axis_est_tdest, + input wire s_axis_est_tlast, + input wire s_axis_est_tvalid, + output wire s_axis_est_tready, + output wire [WIDTH-1:0] m_axis_est_tdata, + output wire [0:0] m_axis_est_tdest, + output wire m_axis_est_tlast, + output wire m_axis_est_tvalid, + input wire m_axis_est_tready, + + // North inter-router connections + input wire [WIDTH-1:0] s_axis_nor_tdata, + input wire [0:0] s_axis_nor_tdest, + input wire s_axis_nor_tlast, + input wire s_axis_nor_tvalid, + output wire s_axis_nor_tready, + output wire [WIDTH-1:0] m_axis_nor_tdata, + output wire [0:0] m_axis_nor_tdest, + output wire m_axis_nor_tlast, + output wire m_axis_nor_tvalid, + input wire m_axis_nor_tready, + + // South inter-router connections + input wire [WIDTH-1:0] s_axis_sou_tdata, + input wire [0:0] s_axis_sou_tdest, + input wire s_axis_sou_tlast, + input wire s_axis_sou_tvalid, + output wire s_axis_sou_tready, + output wire [WIDTH-1:0] m_axis_sou_tdata, + output wire [0:0] m_axis_sou_tdest, + output wire m_axis_sou_tlast, + output wire m_axis_sou_tvalid, + input wire m_axis_sou_tready +); + // ------------------------------------------------- + // Routing functions + // ------------------------------------------------- + + // mesh_node_mapping.vh file contains the mapping between the node number + // and its XY coordinates. It is autogenerated and defines the node_to_xdst + // and node_to_ydst functions. + `include "mesh_node_mapping.vh" + + localparam [2:0] SW_DEST_TER = 3'd0; + localparam [2:0] SW_DEST_WST = 3'd1; + localparam [2:0] SW_DEST_EST = 3'd2; + localparam [2:0] SW_DEST_NOR = 3'd3; + localparam [2:0] SW_DEST_SOU = 3'd4; + localparam [2:0] SW_NUM_DESTS = 3'd5; + + // The compute_switch_tdest function is the destination selector + // i.e. it will inspecte the bottom $clog2(DIM_SIZE)*2 bits of the + // first word of a packet and determine the destination of the packet. + function [3:0] compute_switch_tdest; + input [WIDTH-1:0] header; + input [3:0] src; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, 3 LSBs are the router destination + // - VC in a mesh is always 0 + if (xdiff == 'd0 && ydiff == 'd0) begin + // VC=0 because terminals don't have VCs + compute_switch_tdest = {1'b0, SW_DEST_TER}; + end else if (xdiff == 'd0) begin + // VC=1 for CCW turns and VC=0 for everything else + if (ydiff < 0) + compute_switch_tdest = {(src == SW_DEST_WST), SW_DEST_NOR}; + else + compute_switch_tdest = {(src == SW_DEST_EST), SW_DEST_SOU}; + end else begin + // VC=0 because east-west paths don't have VCs + if (xdiff < 0) + compute_switch_tdest = {1'b0, SW_DEST_WST}; + else + compute_switch_tdest = {1'b0, SW_DEST_EST}; + end + if (xdst != 'hx && ydst != 'hx) begin + if (XB_ADDR_X == 0 && compute_switch_tdest == SW_DEST_WST) + $display("Illegal route chosen: WEST. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y); + if (XB_ADDR_X == DIM_SIZE-1 && compute_switch_tdest == SW_DEST_EST) + $display("Illegal route chosen: EAST. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y); + if (XB_ADDR_Y == 0 && compute_switch_tdest == SW_DEST_NOR) + $display("Illegal route chosen: NORTH. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y); + if (XB_ADDR_Y == DIM_SIZE-1 && compute_switch_tdest == SW_DEST_SOU) + $display("Illegal route chosen: SOUTH. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y); + end + //$display("xdst=%d, ydst=%d, xaddr=%d, yaddr=%d, dst=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y, compute_switch_tdest); + end + endfunction + + // The compute_switch_alloc function is the switch allocation function + // i.e. it chooses which input port reserves the switch for packet transfer. + // After the switch is allocated, all other ports will be backpressured until + // the packet finishes transferring. + function [2:0] compute_switch_alloc; + input [4:0] pkt_waiting; + input [2:0] last_alloc; + begin + if (pkt_waiting == 5'b00000) begin + compute_switch_alloc = SW_DEST_TER; + end else if (pkt_waiting == 5'b00001) begin + compute_switch_alloc = SW_DEST_TER; + end else if (pkt_waiting == 5'b00010) begin + compute_switch_alloc = SW_DEST_WST; + end else if (pkt_waiting == 5'b00100) begin + compute_switch_alloc = SW_DEST_EST; + end else if (pkt_waiting == 5'b01000) begin + compute_switch_alloc = SW_DEST_NOR; + end else if (pkt_waiting == 5'b10000) begin + compute_switch_alloc = SW_DEST_SOU; + end else begin + if (SWITCH_ALLOC == "PRIO") begin + // Priority: South > East > North > West > Term + if (pkt_waiting[SW_DEST_SOU]) + compute_switch_alloc = SW_DEST_SOU; + else if (pkt_waiting[SW_DEST_EST]) + compute_switch_alloc = SW_DEST_EST; + else if (pkt_waiting[SW_DEST_NOR]) + compute_switch_alloc = SW_DEST_NOR; + else if (pkt_waiting[SW_DEST_WST]) + compute_switch_alloc = SW_DEST_WST; + else + compute_switch_alloc = SW_DEST_TER; + end else begin + // Round-robin + if (pkt_waiting[(last_alloc + 3'd1) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd1) % SW_NUM_DESTS; + else if (pkt_waiting[(last_alloc + 3'd2) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd2) % SW_NUM_DESTS; + else if (pkt_waiting[(last_alloc + 3'd3) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd3) % SW_NUM_DESTS; + else if (pkt_waiting[(last_alloc + 3'd4) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd4) % SW_NUM_DESTS; + else + compute_switch_alloc = last_alloc; + end + end + //$display("pkt_waiting=%b, alloc=%d, last_alloc=%d", pkt_waiting, compute_switch_alloc, last_alloc); + end + endfunction + + // ------------------------------------------------- + // Input buffers + // ------------------------------------------------- + wire [WIDTH-1:0] ter_i_tdata; + wire [3:0] ter_i_tdest; + wire ter_i_tlast; + wire ter_i_tvalid; + wire ter_i_tready; + + // Data coming in from the terminal is gated until a full packet arrives + // in order to minimize the switch allocation time per packet. + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE) + ) term_in_pkt_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_ter_tdata), + .i_tlast (s_axis_ter_tlast), + .i_tvalid (s_axis_ter_tvalid), + .i_tready (s_axis_ter_tready), + .i_terror (1'b0), + .o_tdata (ter_i_tdata), + .o_tlast (ter_i_tlast), + .o_tvalid (ter_i_tvalid), + .o_tready (ter_i_tready) + ); + assign ter_i_tdest = compute_switch_tdest(ter_i_tdata, SW_DEST_TER); + + wire [WIDTH-1:0] wst_i_tdata, est_i_tdata, nor_i_tdata, sou_i_tdata; + wire [3:0] wst_i_tdest, est_i_tdest, nor_i_tdest, sou_i_tdest; + wire wst_i_tlast, est_i_tlast, nor_i_tlast, sou_i_tlast; + wire wst_i_tvalid, est_i_tvalid, nor_i_tvalid, sou_i_tvalid; + wire wst_i_tready, est_i_tready, nor_i_tready, sou_i_tready; + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) wst_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_wst_tdata), + .s_axis_tdest (s_axis_wst_tdest), + .s_axis_tlast (s_axis_wst_tlast), + .s_axis_tvalid (s_axis_wst_tvalid), + .s_axis_tready (s_axis_wst_tready), + .m_axis_tdata (wst_i_tdata), + .m_axis_tlast (wst_i_tlast), + .m_axis_tvalid (wst_i_tvalid), + .m_axis_tready (wst_i_tready) + ); + assign wst_i_tdest = compute_switch_tdest(wst_i_tdata, SW_DEST_WST); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) est_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_est_tdata), + .s_axis_tdest (s_axis_est_tdest), + .s_axis_tlast (s_axis_est_tlast), + .s_axis_tvalid (s_axis_est_tvalid), + .s_axis_tready (s_axis_est_tready), + .m_axis_tdata (est_i_tdata), + .m_axis_tlast (est_i_tlast), + .m_axis_tvalid (est_i_tvalid), + .m_axis_tready (est_i_tready) + ); + assign est_i_tdest = compute_switch_tdest(est_i_tdata, SW_DEST_EST); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), // Only north-south traffic has VCs + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) nor_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_nor_tdata), + .s_axis_tdest (s_axis_nor_tdest), + .s_axis_tlast (s_axis_nor_tlast), + .s_axis_tvalid (s_axis_nor_tvalid), + .s_axis_tready (s_axis_nor_tready), + .m_axis_tdata (nor_i_tdata), + .m_axis_tlast (nor_i_tlast), + .m_axis_tvalid (nor_i_tvalid), + .m_axis_tready (nor_i_tready) + ); + assign nor_i_tdest = compute_switch_tdest(nor_i_tdata, SW_DEST_NOR); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), // Only north-south traffic has VCs + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) sou_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_sou_tdata), + .s_axis_tdest (s_axis_sou_tdest), + .s_axis_tlast (s_axis_sou_tlast), + .s_axis_tvalid (s_axis_sou_tvalid), + .s_axis_tready (s_axis_sou_tready), + .m_axis_tdata (sou_i_tdata), + .m_axis_tlast (sou_i_tlast), + .m_axis_tvalid (sou_i_tvalid), + .m_axis_tready (sou_i_tready) + ); + assign sou_i_tdest = compute_switch_tdest(sou_i_tdata, SW_DEST_SOU); + + //------------------------------------------------- + // Switch + //------------------------------------------------- + // Track the input packet state + localparam [0:0] PKT_ST_HEAD = 1'b0; + localparam [0:0] PKT_ST_BODY = 1'b1; + reg [0:0] pkt_state = PKT_ST_HEAD; + + // The switch only accept packets on a single port at a time. + wire sw_in_ready = |({sou_i_tready, nor_i_tready, est_i_tready, wst_i_tready, ter_i_tready}); + wire sw_in_valid = |({sou_i_tvalid, nor_i_tvalid, est_i_tvalid, wst_i_tvalid, ter_i_tvalid}); + wire sw_in_last = |({sou_i_tlast & sou_i_tvalid, nor_i_tlast & nor_i_tvalid, + est_i_tlast & est_i_tvalid, wst_i_tlast & wst_i_tvalid, + ter_i_tlast & ter_i_tvalid}); + + always @(posedge clk) begin + if (reset) begin + pkt_state <= PKT_ST_HEAD; + end else if (sw_in_valid & sw_in_ready) begin + pkt_state <= sw_in_last ? PKT_ST_HEAD : PKT_ST_BODY; + end + end + + // The switch requires the allocation to stay valid until the + // end of the packet. We also might need to keep the previous + // packet's allocation to compute the current one + wire [2:0] switch_alloc; + reg [2:0] prev_switch_alloc = SW_DEST_TER; + reg [2:0] pkt_switch_alloc = SW_DEST_TER; + + always @(posedge clk) begin + if (reset) begin + prev_switch_alloc <= SW_DEST_TER; + pkt_switch_alloc <= SW_DEST_TER; + end else if (sw_in_valid & sw_in_ready) begin + if (pkt_state == PKT_ST_HEAD) + pkt_switch_alloc <= switch_alloc; + if (sw_in_last) + prev_switch_alloc <= switch_alloc; + end + end + + assign switch_alloc = (sw_in_valid && pkt_state == PKT_ST_HEAD) ? + compute_switch_alloc({sou_i_tvalid, nor_i_tvalid, est_i_tvalid, wst_i_tvalid, ter_i_tvalid}, prev_switch_alloc) : + pkt_switch_alloc; + + wire ter_tdest_discard; + axis_switch #( + .DATA_W(WIDTH), .DEST_W(1), .IN_PORTS(5), .OUT_PORTS(5) + ) switch_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata ({sou_i_tdata , nor_i_tdata , est_i_tdata , wst_i_tdata , ter_i_tdata }), + .s_axis_tdest ({sou_i_tdest , nor_i_tdest , est_i_tdest , wst_i_tdest , ter_i_tdest }), + .s_axis_tlast ({sou_i_tlast , nor_i_tlast , est_i_tlast , wst_i_tlast , ter_i_tlast }), + .s_axis_tvalid ({sou_i_tvalid, nor_i_tvalid, est_i_tvalid, wst_i_tvalid, ter_i_tvalid}), + .s_axis_tready ({sou_i_tready, nor_i_tready, est_i_tready, wst_i_tready, ter_i_tready}), + .s_axis_alloc (switch_alloc), + .m_axis_tdata ({m_axis_sou_tdata, m_axis_nor_tdata, m_axis_est_tdata, m_axis_wst_tdata, m_axis_ter_tdata }), + .m_axis_tdest ({m_axis_sou_tdest, m_axis_nor_tdest, m_axis_est_tdest, m_axis_wst_tdest, ter_tdest_discard}), + .m_axis_tlast ({m_axis_sou_tlast, m_axis_nor_tlast, m_axis_est_tlast, m_axis_wst_tlast, m_axis_ter_tlast }), + .m_axis_tvalid ({m_axis_sou_tvalid, m_axis_nor_tvalid, m_axis_est_tvalid, m_axis_wst_tvalid, m_axis_ter_tvalid}), + .m_axis_tready ({m_axis_sou_tready, m_axis_nor_tready, m_axis_est_tready, m_axis_wst_tready, m_axis_ter_tready}) + ); + + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh b/fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh new file mode 100644 index 000000000..466b0c615 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh @@ -0,0 +1,294 @@ +// Copyright 2018 Ettus Research, A National Instruments Company +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Autogenerated file. Do not modify. +// $ ./gen_node_to_coord_mapping.py --dimsize 16 --pattern spiral + +parameter CLOG2_DIM_SIZE = $clog2(DIM_SIZE); //Vivado workaround + + +// DIM_SIZE = 2, PATTERN = SPIRAL +//------------------------------------ +// 0 1 +// 3 2 +localparam [3:0] XCOORD_DIM_002 = {1'd0,1'd1,1'd1,1'd0}; +localparam [3:0] YCOORD_DIM_002 = {1'd1,1'd1,1'd0,1'd0}; + +// DIM_SIZE = 3, PATTERN = SPIRAL +//------------------------------------ +// 6 7 8 +// 5 0 1 +// 4 3 2 +localparam [17:0] XCOORD_DIM_003 = {2'd2,2'd1,2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd1}; +localparam [17:0] YCOORD_DIM_003 = {2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd2,2'd1,2'd1}; + +// DIM_SIZE = 4, PATTERN = SPIRAL +//------------------------------------ +// 6 7 8 9 +// 5 0 1 10 +// 4 3 2 11 +// 15 14 13 12 +localparam [31:0] XCOORD_DIM_004 = {2'd0,2'd1,2'd2,2'd3,2'd3,2'd3,2'd3,2'd2,2'd1,2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd1}; +localparam [31:0] YCOORD_DIM_004 = {2'd3,2'd3,2'd3,2'd3,2'd2,2'd1,2'd0,2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd2,2'd1,2'd1}; + +// DIM_SIZE = 5, PATTERN = SPIRAL +//------------------------------------ +// 20 21 22 23 24 +// 19 6 7 8 9 +// 18 5 0 1 10 +// 17 4 3 2 11 +// 16 15 14 13 12 +localparam [74:0] XCOORD_DIM_005 = {3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd2}; +localparam [74:0] YCOORD_DIM_005 = {3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd3,3'd2,3'd2}; + +// DIM_SIZE = 6, PATTERN = SPIRAL +//------------------------------------ +// 20 21 22 23 24 25 +// 19 6 7 8 9 26 +// 18 5 0 1 10 27 +// 17 4 3 2 11 28 +// 16 15 14 13 12 29 +// 35 34 33 32 31 30 +localparam [107:0] XCOORD_DIM_006 = {3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd2}; +localparam [107:0] YCOORD_DIM_006 = {3'd5,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd3,3'd2,3'd2}; + +// DIM_SIZE = 7, PATTERN = SPIRAL +//------------------------------------ +// 42 43 44 45 46 47 48 +// 41 20 21 22 23 24 25 +// 40 19 6 7 8 9 26 +// 39 18 5 0 1 10 27 +// 38 17 4 3 2 11 28 +// 37 16 15 14 13 12 29 +// 36 35 34 33 32 31 30 +localparam [146:0] XCOORD_DIM_007 = {3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd3}; +localparam [146:0] YCOORD_DIM_007 = {3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd4,3'd3,3'd3}; + +// DIM_SIZE = 8, PATTERN = SPIRAL +//------------------------------------ +// 42 43 44 45 46 47 48 49 +// 41 20 21 22 23 24 25 50 +// 40 19 6 7 8 9 26 51 +// 39 18 5 0 1 10 27 52 +// 38 17 4 3 2 11 28 53 +// 37 16 15 14 13 12 29 54 +// 36 35 34 33 32 31 30 55 +// 63 62 61 60 59 58 57 56 +localparam [191:0] XCOORD_DIM_008 = {3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd3}; +localparam [191:0] YCOORD_DIM_008 = {3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd4,3'd3,3'd3}; + +// DIM_SIZE = 9, PATTERN = SPIRAL +//------------------------------------ +// 72 73 74 75 76 77 78 79 80 +// 71 42 43 44 45 46 47 48 49 +// 70 41 20 21 22 23 24 25 50 +// 69 40 19 6 7 8 9 26 51 +// 68 39 18 5 0 1 10 27 52 +// 67 38 17 4 3 2 11 28 53 +// 66 37 16 15 14 13 12 29 54 +// 65 36 35 34 33 32 31 30 55 +// 64 63 62 61 60 59 58 57 56 +localparam [323:0] XCOORD_DIM_009 = {4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd4}; +localparam [323:0] YCOORD_DIM_009 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd5,4'd4,4'd4}; + +// DIM_SIZE = 10, PATTERN = SPIRAL +//------------------------------------ +// 72 73 74 75 76 77 78 79 80 81 +// 71 42 43 44 45 46 47 48 49 82 +// 70 41 20 21 22 23 24 25 50 83 +// 69 40 19 6 7 8 9 26 51 84 +// 68 39 18 5 0 1 10 27 52 85 +// 67 38 17 4 3 2 11 28 53 86 +// 66 37 16 15 14 13 12 29 54 87 +// 65 36 35 34 33 32 31 30 55 88 +// 64 63 62 61 60 59 58 57 56 89 +// 99 98 97 96 95 94 93 92 91 90 +localparam [399:0] XCOORD_DIM_010 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd4}; +localparam [399:0] YCOORD_DIM_010 = {4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd5,4'd4,4'd4}; + +// DIM_SIZE = 11, PATTERN = SPIRAL +//------------------------------------ +// 110 111 112 113 114 115 116 117 118 119 120 +// 109 72 73 74 75 76 77 78 79 80 81 +// 108 71 42 43 44 45 46 47 48 49 82 +// 107 70 41 20 21 22 23 24 25 50 83 +// 106 69 40 19 6 7 8 9 26 51 84 +// 105 68 39 18 5 0 1 10 27 52 85 +// 104 67 38 17 4 3 2 11 28 53 86 +// 103 66 37 16 15 14 13 12 29 54 87 +// 102 65 36 35 34 33 32 31 30 55 88 +// 101 64 63 62 61 60 59 58 57 56 89 +// 100 99 98 97 96 95 94 93 92 91 90 +localparam [483:0] XCOORD_DIM_011 = {4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd5}; +localparam [483:0] YCOORD_DIM_011 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd6,4'd5,4'd5}; + +// DIM_SIZE = 12, PATTERN = SPIRAL +//------------------------------------ +// 110 111 112 113 114 115 116 117 118 119 120 121 +// 109 72 73 74 75 76 77 78 79 80 81 122 +// 108 71 42 43 44 45 46 47 48 49 82 123 +// 107 70 41 20 21 22 23 24 25 50 83 124 +// 106 69 40 19 6 7 8 9 26 51 84 125 +// 105 68 39 18 5 0 1 10 27 52 85 126 +// 104 67 38 17 4 3 2 11 28 53 86 127 +// 103 66 37 16 15 14 13 12 29 54 87 128 +// 102 65 36 35 34 33 32 31 30 55 88 129 +// 101 64 63 62 61 60 59 58 57 56 89 130 +// 100 99 98 97 96 95 94 93 92 91 90 131 +// 143 142 141 140 139 138 137 136 135 134 133 132 +localparam [575:0] XCOORD_DIM_012 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd5}; +localparam [575:0] YCOORD_DIM_012 = {4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd6,4'd5,4'd5}; + +// DIM_SIZE = 13, PATTERN = SPIRAL +//------------------------------------ +// 156 157 158 159 160 161 162 163 164 165 166 167 168 +// 155 110 111 112 113 114 115 116 117 118 119 120 121 +// 154 109 72 73 74 75 76 77 78 79 80 81 122 +// 153 108 71 42 43 44 45 46 47 48 49 82 123 +// 152 107 70 41 20 21 22 23 24 25 50 83 124 +// 151 106 69 40 19 6 7 8 9 26 51 84 125 +// 150 105 68 39 18 5 0 1 10 27 52 85 126 +// 149 104 67 38 17 4 3 2 11 28 53 86 127 +// 148 103 66 37 16 15 14 13 12 29 54 87 128 +// 147 102 65 36 35 34 33 32 31 30 55 88 129 +// 146 101 64 63 62 61 60 59 58 57 56 89 130 +// 145 100 99 98 97 96 95 94 93 92 91 90 131 +// 144 143 142 141 140 139 138 137 136 135 134 133 132 +localparam [675:0] XCOORD_DIM_013 = {4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd6}; +localparam [675:0] YCOORD_DIM_013 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd7,4'd6,4'd6}; + +// DIM_SIZE = 14, PATTERN = SPIRAL +//------------------------------------ +// 156 157 158 159 160 161 162 163 164 165 166 167 168 169 +// 155 110 111 112 113 114 115 116 117 118 119 120 121 170 +// 154 109 72 73 74 75 76 77 78 79 80 81 122 171 +// 153 108 71 42 43 44 45 46 47 48 49 82 123 172 +// 152 107 70 41 20 21 22 23 24 25 50 83 124 173 +// 151 106 69 40 19 6 7 8 9 26 51 84 125 174 +// 150 105 68 39 18 5 0 1 10 27 52 85 126 175 +// 149 104 67 38 17 4 3 2 11 28 53 86 127 176 +// 148 103 66 37 16 15 14 13 12 29 54 87 128 177 +// 147 102 65 36 35 34 33 32 31 30 55 88 129 178 +// 146 101 64 63 62 61 60 59 58 57 56 89 130 179 +// 145 100 99 98 97 96 95 94 93 92 91 90 131 180 +// 144 143 142 141 140 139 138 137 136 135 134 133 132 181 +// 195 194 193 192 191 190 189 188 187 186 185 184 183 182 +localparam [783:0] XCOORD_DIM_014 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd6}; +localparam [783:0] YCOORD_DIM_014 = {4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd7,4'd6,4'd6}; + +// DIM_SIZE = 15, PATTERN = SPIRAL +//------------------------------------ +// 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 +// 209 156 157 158 159 160 161 162 163 164 165 166 167 168 169 +// 208 155 110 111 112 113 114 115 116 117 118 119 120 121 170 +// 207 154 109 72 73 74 75 76 77 78 79 80 81 122 171 +// 206 153 108 71 42 43 44 45 46 47 48 49 82 123 172 +// 205 152 107 70 41 20 21 22 23 24 25 50 83 124 173 +// 204 151 106 69 40 19 6 7 8 9 26 51 84 125 174 +// 203 150 105 68 39 18 5 0 1 10 27 52 85 126 175 +// 202 149 104 67 38 17 4 3 2 11 28 53 86 127 176 +// 201 148 103 66 37 16 15 14 13 12 29 54 87 128 177 +// 200 147 102 65 36 35 34 33 32 31 30 55 88 129 178 +// 199 146 101 64 63 62 61 60 59 58 57 56 89 130 179 +// 198 145 100 99 98 97 96 95 94 93 92 91 90 131 180 +// 197 144 143 142 141 140 139 138 137 136 135 134 133 132 181 +// 196 195 194 193 192 191 190 189 188 187 186 185 184 183 182 +localparam [899:0] XCOORD_DIM_015 = {4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd7}; +localparam [899:0] YCOORD_DIM_015 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd8,4'd7,4'd7}; + +// DIM_SIZE = 16, PATTERN = SPIRAL +//------------------------------------ +// 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 +// 209 156 157 158 159 160 161 162 163 164 165 166 167 168 169 226 +// 208 155 110 111 112 113 114 115 116 117 118 119 120 121 170 227 +// 207 154 109 72 73 74 75 76 77 78 79 80 81 122 171 228 +// 206 153 108 71 42 43 44 45 46 47 48 49 82 123 172 229 +// 205 152 107 70 41 20 21 22 23 24 25 50 83 124 173 230 +// 204 151 106 69 40 19 6 7 8 9 26 51 84 125 174 231 +// 203 150 105 68 39 18 5 0 1 10 27 52 85 126 175 232 +// 202 149 104 67 38 17 4 3 2 11 28 53 86 127 176 233 +// 201 148 103 66 37 16 15 14 13 12 29 54 87 128 177 234 +// 200 147 102 65 36 35 34 33 32 31 30 55 88 129 178 235 +// 199 146 101 64 63 62 61 60 59 58 57 56 89 130 179 236 +// 198 145 100 99 98 97 96 95 94 93 92 91 90 131 180 237 +// 197 144 143 142 141 140 139 138 137 136 135 134 133 132 181 238 +// 196 195 194 193 192 191 190 189 188 187 186 185 184 183 182 239 +// 255 254 253 252 251 250 249 248 247 246 245 244 243 242 241 240 +localparam [1023:0] XCOORD_DIM_016 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd7}; +localparam [1023:0] YCOORD_DIM_016 = {4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd8,4'd7,4'd7}; + + +function [CLOG2_DIM_SIZE-1:0] node_to_xdst; + input [WIDTH-1:0] header; +begin + if (DIM_SIZE == 2) + node_to_xdst = XCOORD_DIM_002[1*header[1:0] +: 1]; + else if (DIM_SIZE == 3) + node_to_xdst = XCOORD_DIM_003[2*header[3:0] +: 2]; + else if (DIM_SIZE == 4) + node_to_xdst = XCOORD_DIM_004[2*header[3:0] +: 2]; + else if (DIM_SIZE == 5) + node_to_xdst = XCOORD_DIM_005[3*header[4:0] +: 3]; + else if (DIM_SIZE == 6) + node_to_xdst = XCOORD_DIM_006[3*header[5:0] +: 3]; + else if (DIM_SIZE == 7) + node_to_xdst = XCOORD_DIM_007[3*header[5:0] +: 3]; + else if (DIM_SIZE == 8) + node_to_xdst = XCOORD_DIM_008[3*header[5:0] +: 3]; + else if (DIM_SIZE == 9) + node_to_xdst = XCOORD_DIM_009[4*header[6:0] +: 4]; + else if (DIM_SIZE == 10) + node_to_xdst = XCOORD_DIM_010[4*header[6:0] +: 4]; + else if (DIM_SIZE == 11) + node_to_xdst = XCOORD_DIM_011[4*header[6:0] +: 4]; + else if (DIM_SIZE == 12) + node_to_xdst = XCOORD_DIM_012[4*header[7:0] +: 4]; + else if (DIM_SIZE == 13) + node_to_xdst = XCOORD_DIM_013[4*header[7:0] +: 4]; + else if (DIM_SIZE == 14) + node_to_xdst = XCOORD_DIM_014[4*header[7:0] +: 4]; + else if (DIM_SIZE == 15) + node_to_xdst = XCOORD_DIM_015[4*header[7:0] +: 4]; + else if (DIM_SIZE == 16) + node_to_xdst = XCOORD_DIM_016[4*header[7:0] +: 4]; + else + node_to_xdst = {CLOG2_DIM_SIZE{1'd0}}; +end endfunction + +function [CLOG2_DIM_SIZE-1:0] node_to_ydst; + input [WIDTH-1:0] header; +begin + if (DIM_SIZE == 2) + node_to_ydst = YCOORD_DIM_002[1*header[1:0] +: 1]; + else if (DIM_SIZE == 3) + node_to_ydst = YCOORD_DIM_003[2*header[3:0] +: 2]; + else if (DIM_SIZE == 4) + node_to_ydst = YCOORD_DIM_004[2*header[3:0] +: 2]; + else if (DIM_SIZE == 5) + node_to_ydst = YCOORD_DIM_005[3*header[4:0] +: 3]; + else if (DIM_SIZE == 6) + node_to_ydst = YCOORD_DIM_006[3*header[5:0] +: 3]; + else if (DIM_SIZE == 7) + node_to_ydst = YCOORD_DIM_007[3*header[5:0] +: 3]; + else if (DIM_SIZE == 8) + node_to_ydst = YCOORD_DIM_008[3*header[5:0] +: 3]; + else if (DIM_SIZE == 9) + node_to_ydst = YCOORD_DIM_009[4*header[6:0] +: 4]; + else if (DIM_SIZE == 10) + node_to_ydst = YCOORD_DIM_010[4*header[6:0] +: 4]; + else if (DIM_SIZE == 11) + node_to_ydst = YCOORD_DIM_011[4*header[6:0] +: 4]; + else if (DIM_SIZE == 12) + node_to_ydst = YCOORD_DIM_012[4*header[7:0] +: 4]; + else if (DIM_SIZE == 13) + node_to_ydst = YCOORD_DIM_013[4*header[7:0] +: 4]; + else if (DIM_SIZE == 14) + node_to_ydst = YCOORD_DIM_014[4*header[7:0] +: 4]; + else if (DIM_SIZE == 15) + node_to_ydst = YCOORD_DIM_015[4*header[7:0] +: 4]; + else if (DIM_SIZE == 16) + node_to_ydst = YCOORD_DIM_016[4*header[7:0] +: 4]; + else + node_to_ydst = {CLOG2_DIM_SIZE{1'd0}}; +end endfunction + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl new file mode 100644 index 000000000..39440b512 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl @@ -0,0 +1,18 @@ +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +create_project tmp_proj -part xc7k410tffg900-3 -in_memory +add_files {axis_ctrl_crossbar_nxn_top.v ../axis_ctrl_crossbar_nxn.v ../axis_ctrl_crossbar_2d_mesh.v ../mesh_2d_dor_router_multi_sw.v ../axis_switch.v ../axis_ingress_vc_buff.v ../mesh_node_mapping.vh ../mesh_2d_dor_router_single_sw.v ../torus_2d_dor_router_single_sw.v ../torus_2d_dor_router_multi_sw.v ../axis_port_terminator.v} +add_files {../../../fifo/axi_fifo_flop.v ../../../fifo/axi_fifo_flop2.v ../../../fifo/axi_fifo.v ../../../fifo/axi_mux_select.v ../../../fifo/axi_fifo_bram.v ../../../fifo/axi_fifo_cascade.v ../../../fifo/axi_mux.v ../../../fifo/axi_fifo_short.v ../../../fifo/axi_demux.v ../../../fifo/axi_packet_gate.v ../../../control/map/cam_priority_encoder.v ../../../control/map/cam_srl.v ../../../control/map/cam_bram.v ../../../control/map/cam.v ../../../control/map/kv_map.v ../../../control/map/axis_muxed_kv_map.v ../../../control/ram_2port.v} +set_property top axis_ctrl_crossbar_nxn_top [current_fileset] +synth_design +create_clock -name clk -period 2.0 [get_ports clk] +report_utilization -no_primitives -file axis_ctrl_crossbar_nxn.rpt +report_timing_summary -setup -no_detailed_paths -no_header -datasheet -append -file axis_ctrl_crossbar_nxn.rpt +write_checkpoint -force axis_ctrl_crossbar_nxn.dcp +close_project +exit
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in new file mode 100644 index 000000000..6805100b9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in @@ -0,0 +1,47 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module axis_ctrl_crossbar_nxn_top( + input clk, + input rst +); + // Router global config + localparam IMPL = "{top}"; + localparam NPORTS = {ports}; + localparam DWIDTH = {dataw}; + localparam MTU = {mtu}; + localparam ROUTING = "{ralloc}"; + + (* dont_touch = "true"*) wire [(DWIDTH*NPORTS)-1:0] s_axis_tdata , m_axis_tdata ; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tlast , m_axis_tlast ; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tvalid, m_axis_tvalid; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tready, m_axis_tready; + (* dont_touch = "true"*) wire deadlock_detected; + + axis_ctrl_crossbar_nxn #( + .WIDTH (DWIDTH), + .NPORTS (NPORTS), + .TOPOLOGY (IMPL), + .INGRESS_BUFF_SIZE(MTU), + .ROUTER_BUFF_SIZE (MTU), + .ROUTING_ALLOC (ROUTING), + .SWITCH_ALLOC ("ROUND-ROBIN") + ) router_dut_i ( + .clk (clk), + .reset (rst), + .s_axis_tdata (s_axis_tdata ), + .s_axis_tlast (s_axis_tlast ), + .s_axis_tvalid (s_axis_tvalid), + .s_axis_tready (s_axis_tready), + .m_axis_tdata (m_axis_tdata ), + .m_axis_tlast (m_axis_tlast ), + .m_axis_tvalid (m_axis_tvalid), + .m_axis_tready (m_axis_tready), + .deadlock_detected(deadlock_detected) + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl new file mode 100644 index 000000000..304384aee --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl @@ -0,0 +1,18 @@ +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +create_project tmp_proj -part xc7k410tffg900-3 -in_memory +add_files {chdr_crossbar_nxn_top.v ../chdr_crossbar_nxn.v ../axis_switch.v ../chdr_xb_ingress_buff.v ../chdr_xb_routing_table.v ../../core/chdr_mgmt_pkt_handler.v ../../core/rfnoc_chdr_utils.vh ../../core/rfnoc_chdr_internal_utils.vh} +add_files {../../../fifo/axi_fifo_flop.v ../../../fifo/axi_fifo_flop2.v ../../../fifo/axi_fifo.v ../../../fifo/axi_mux_select.v ../../../fifo/axi_fifo_bram.v ../../../fifo/axi_fifo_cascade.v ../../../fifo/axi_mux.v ../../../fifo/axi_fifo_short.v ../../../fifo/axi_demux.v ../../../fifo/axi_packet_gate.v ../../../control/map/cam_priority_encoder.v ../../../control/map/cam_srl.v ../../../control/map/cam_bram.v ../../../control/map/cam.v ../../../control/map/kv_map.v ../../../control/map/axis_muxed_kv_map.v ../../../control/ram_2port.v} +set_property top chdr_crossbar_nxn_top [current_fileset] +synth_design +create_clock -name clk -period 2.0 [get_ports clk] +report_utilization -no_primitives -file chdr_crossbar_nxn.rpt +report_timing_summary -setup -no_detailed_paths -no_header -datasheet -append -file chdr_crossbar_nxn.rpt +write_checkpoint -force chdr_crossbar_nxn.dcp +close_project +exit
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in new file mode 100644 index 000000000..fbf0852a3 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in @@ -0,0 +1,55 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module chdr_crossbar_nxn_top( + input clk, + input rst +); + // Router global config + localparam NPORTS = {ports}; + localparam DWIDTH = {dataw}; + localparam MTU = {mtu}; + localparam RLUT_SIZE = {rlutsize}; + localparam OPTIMIZE = "{opt}"; + + (* dont_touch = "true"*) wire [(DWIDTH*NPORTS)-1:0] s_axis_tdata , m_axis_tdata ; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tlast , m_axis_tlast ; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tvalid, m_axis_tvalid; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tready, m_axis_tready; + + chdr_crossbar_nxn #( + .CHDR_W (DWIDTH), + .NPORTS (NPORTS), + .DEFAULT_PORT (0), + .MTU (MTU), + .ROUTE_TBL_SIZE (RLUT_SIZE), + .MUX_ALLOC ("ROUND-ROBIN"), + .OPTIMIZE (OPTIMIZE), + .NPORTS_MGMT (NPORTS), + .EXT_RTCFG_PORT (1) + ) router_dut_i ( + // General + .clk (clk), + .reset (rst), + // Inputs + .s_axis_tdata (s_axis_tdata), + .s_axis_tlast (s_axis_tlast), + .s_axis_tvalid (s_axis_tvalid), + .s_axis_tready (s_axis_tready), + // Output + .m_axis_tdata (m_axis_tdata), + .m_axis_tlast (m_axis_tlast), + .m_axis_tvalid (m_axis_tvalid), + .m_axis_tready (m_axis_tready), + // External rtcfg port + .ext_rtcfg_stb (0), + .ext_rtcfg_addr (0), + .ext_rtcfg_data (0), + .ext_rtcfg_ack () + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py new file mode 100755 index 000000000..4ca6e07fa --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py @@ -0,0 +1,37 @@ +#! /usr/bin/python3 +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +import argparse +import synth_run + +modname = 'axis_ctrl_crossbar_nxn' + +# Parse command line options +def get_options(): + parser = argparse.ArgumentParser(description='Generate synthesis results for ' + modname) + parser.add_argument('--top', type=str, default='TORUS', help='Topologies (CSV)') + parser.add_argument('--ports', type=str, default='8', help='Number of ports (CSV)') + parser.add_argument('--dataw', type=str, default='32', help='Router datapath width (CSV)') + parser.add_argument('--mtu', type=str, default='5', help='MTU (CSV)') + parser.add_argument('--ralloc', type=str, default='WORMHOLE', help='Router allocation method (CSV)') + return parser.parse_args() + +def main(): + args = get_options() + keys = ['top', 'ports', 'dataw', 'mtu', 'ralloc'] + for top in args.top.strip().split(','): + for ports in args.ports.strip().split(','): + for dataw in args.dataw.strip().split(','): + for mtu in args.mtu.strip().split(','): + for ralloc in args.ralloc.strip().split(','): + # Collect parameters + transform = {'ports':ports, 'dataw':dataw, 'mtu':mtu, 'top':top, 'ralloc':ralloc} + synth_run.synth_run(modname, keys, transform) + +if __name__ == '__main__': + main() diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py new file mode 100755 index 000000000..668e7a247 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py @@ -0,0 +1,37 @@ +#! /usr/bin/python3 +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +import argparse +import synth_run + +modname = 'chdr_crossbar_nxn' + +# Parse command line options +def get_options(): + parser = argparse.ArgumentParser(description='Generate synthesis results for ' + modname) + parser.add_argument('--opt', type=str, default='AREA', help='Optimization strategies (CSV)') + parser.add_argument('--ports', type=str, default='8', help='Number of ports (CSV)') + parser.add_argument('--dataw', type=str, default='64', help='Router datapath width (CSV)') + parser.add_argument('--mtu', type=str, default='10', help='MTU or Ingress buffer size (CSV)') + parser.add_argument('--rlutsize', type=str, default='6', help='Router lookup table size (CSV)') + return parser.parse_args() + +def main(): + args = get_options() + keys = ['opt', 'ports', 'dataw', 'mtu', 'rlutsize'] + for opt in args.opt.strip().split(','): + for ports in args.ports.strip().split(','): + for dataw in args.dataw.strip().split(','): + for mtu in args.mtu.strip().split(','): + for rlutsize in args.rlutsize.strip().split(','): + # Collect parameters + transform = {'opt':opt, 'ports':ports, 'dataw':dataw, 'mtu':mtu, 'rlutsize':rlutsize} + synth_run.synth_run(modname, keys, transform) + +if __name__ == '__main__': + main() diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py new file mode 100644 index 000000000..a9801ac20 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py @@ -0,0 +1,67 @@ +#! /usr/bin/python3 +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +import sys, os +import subprocess +import re + +def synth_run(modname, keys, transform): + prefix = modname + '_' + ('_'.join(['%s%s'%(k,transform[k]) for k in keys])) + print('='*(len(prefix)+2)) + print(' %s '%(prefix)) + print('='*(len(prefix)+2)) + # Write Verilog top-level file + with open(modname + '_top.v.in', 'r') as in_file: + with open(modname + '_top.v', 'w') as out_file: + out_file.write(in_file.read().format(**transform)) + # Run Vivado + exitcode = subprocess.Popen( + 'vivado -mode tcl -source %s_top.tcl -nolog -nojou'%(modname), shell=True + ).wait() + if exitcode != 0: + raise RuntimeError('Error running vivado. Was setupenv.sh run?') + # Extract info + lut = 100.0 + reg = 100.0 + bram = 100.0 + dsp = 100.0 + fmax = 0.0 + with open(modname + '.rpt', 'r') as rpt_file: + rpt = rpt_file.readlines() + for line in rpt: + lm = re.match(r'.*Slice LUTs\*.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line) + if lm is not None: + lut = float(lm.group(1).strip()) + rm = re.match(r'.*Slice Registers.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line) + if rm is not None: + reg = float(rm.group(1).strip()) + bm = re.match(r'.*Block RAM Tile.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line) + if bm is not None: + bram = float(bm.group(1).strip()) + dm = re.match(r'.*DSPs.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line) + if dm is not None: + dsp = float(dm.group(1).strip()) + tm = re.match(r'.*clk.*\| clk\s*\|(.*)\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|', line) + if tm is not None: + fmax = 1000.0/float(tm.group(1).strip()) + # Save report + os.rename(modname + '.rpt', prefix + '.rpt') + os.rename(modname + '.dcp', prefix + '.dcp') + try: + os.remove(modname + '_top.v') + os.remove('fsm_encoding.os') + except FileNotFoundError: + pass + # Write summary report line + res_keys = ['lut','reg','bram','dsp','fmax'] + res = {'lut':lut, 'reg':reg, 'bram':bram, 'dsp':dsp, 'fmax':fmax, 'prefix':prefix} + if not os.path.exists(modname + '_summary.csv'): + with open(modname + '_summary.csv', 'w') as summaryf: + summaryf.write((','.join(keys + res_keys)) + '\n') + with open(modname + '_summary.csv', 'a') as summaryf: + summaryf.write((','.join(['%s'%(transform[k]) for k in keys])) + ',' + (','.join(['%.1f'%(res[k]) for k in res_keys])) + '\n') diff --git a/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v new file mode 100644 index 000000000..cd70450a0 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v @@ -0,0 +1,338 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: torus_2d_dor_router_multi_sw +// Description: +// Alternate implementation for torus_2d_dor_router_single_sw with +// multiple switches for independent paths between inputs and outputs +// **NOTE**: This module has not been validated + +module torus_2d_dor_router_multi_sw #( + parameter WIDTH = 64, + parameter DIM_SIZE = 4, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0, + parameter TERM_BUFF_SIZE = 5, + parameter XB_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE" +) ( + // Clocks and resets + input wire clk, + input wire reset, + + // Terminal connections + input wire [WIDTH-1:0] s_axis_term_tdata, + input wire s_axis_term_tlast, + input wire s_axis_term_tvalid, + output wire s_axis_term_tready, + output wire [WIDTH-1:0] m_axis_term_tdata, + output wire m_axis_term_tlast, + output wire m_axis_term_tvalid, + input wire m_axis_term_tready, + + // X-dimension inter-XB connections + input wire [WIDTH-1:0] s_axis_xdim_tdata, + input wire [0:0] s_axis_xdim_tdest, + input wire s_axis_xdim_tlast, + input wire s_axis_xdim_tvalid, + output wire s_axis_xdim_tready, + output wire [WIDTH-1:0] m_axis_xdim_tdata, + output wire [0:0] m_axis_xdim_tdest, + output wire m_axis_xdim_tlast, + output wire m_axis_xdim_tvalid, + input wire m_axis_xdim_tready, + + // Y-dimension inter-XB connections + input wire [WIDTH-1:0] s_axis_ydim_tdata, + input wire [0:0] s_axis_ydim_tdest, + input wire s_axis_ydim_tlast, + input wire s_axis_ydim_tvalid, + output wire s_axis_ydim_tready, + output wire [WIDTH-1:0] m_axis_ydim_tdata, + output wire [0:0] m_axis_ydim_tdest, + output wire m_axis_ydim_tlast, + output wire m_axis_ydim_tvalid, + input wire m_axis_ydim_tready +); + + // ------------------------------------------------- + // Routing functions + // ------------------------------------------------- + `include "mesh_node_mapping.vh" + + function [2:0] term_in_route; + input [WIDTH:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst, xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, 2 LSBs are the router destination + // - Long journeys get VC = 1 to bypass local traffic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + term_in_route = {1'b0 /* VC don't care */, 2'd2 /* term out */}; + end else if (xdst == XB_ADDR_X) begin + term_in_route = {ydiff[$clog2(DIM_SIZE)-1], 2'd0 /* ydim out */}; + end else begin + term_in_route = {xdiff[$clog2(DIM_SIZE)-1], 2'd1 /* xdim out */}; + end + end + endfunction + + function [2:0] xdim_in_route; + input [WIDTH:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst, xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, 2 LSBs are the router destination + // - Long journeys get VC = 1 to bypass local traffic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + xdim_in_route = {1'b0 /* VC don't care */, 2'd2 /* term out */}; + end else if (xdst == XB_ADDR_X) begin + xdim_in_route = {ydiff[$clog2(DIM_SIZE)-1], 2'd0 /* ydim out */}; + end else begin + xdim_in_route = {xdiff[$clog2(DIM_SIZE)-1], 2'd1 /* xdim out */}; + end + end + endfunction + + function [1:0] ydim_in_route; + input [WIDTH:0] header; + reg [$clog2(DIM_SIZE)-1:0] ydst, ydiff; + begin + ydst = node_to_ydst(header); + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, LSB is the router destination + // - Long journeys get VC = 1 to bypass local traffic + if (ydst == XB_ADDR_Y) begin + ydim_in_route = {1'b0 /* VC don't care */, 1'd1 /* term out */}; + end else begin + ydim_in_route = {ydiff[$clog2(DIM_SIZE)-1], 1'd0 /* ydim out */}; + end + end + endfunction + + // ------------------------------------------------- + // Input demuxes + // ------------------------------------------------- + wire [WIDTH-1:0] ti_gt_tdata; + wire ti_gt_tdest; + wire ti_gt_tlast; + wire ti_gt_tvalid; + wire ti_gt_tready; + wire [WIDTH-1:0] t2t_tdata, t2x_tdata, t2y_tdata; + wire t2t_tdest, t2x_tdest, t2y_tdest; + wire t2t_tlast, t2x_tlast, t2y_tlast; + wire t2t_tvalid, t2x_tvalid, t2y_tvalid; + wire t2t_tready, t2x_tready, t2y_tready; + wire [WIDTH-1:0] term_in_hdr; + wire [1:0] term_in_port; + + assign {ti_gt_tdest, term_in_port} = term_in_route(term_in_hdr); + + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE) + ) term_in_pkt_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_term_tdata), + .i_tlast (s_axis_term_tlast), + .i_tvalid (s_axis_term_tvalid), + .i_tready (s_axis_term_tready), + .i_terror (1'b0), + .o_tdata (ti_gt_tdata), + .o_tlast (ti_gt_tlast), + .o_tvalid (ti_gt_tvalid), + .o_tready (ti_gt_tready) + ); + + axi_demux #( + .WIDTH(WIDTH+1), .SIZE(3), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) term_in_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (term_in_hdr), + .dest (term_in_port), + .i_tdata ({ti_gt_tdest, ti_gt_tdata}), + .i_tlast (ti_gt_tlast), + .i_tvalid (ti_gt_tvalid), + .i_tready (ti_gt_tready), + .o_tdata ({t2t_tdest, t2t_tdata, t2x_tdest, t2x_tdata, t2y_tdest, t2y_tdata}), + .o_tlast ({t2t_tlast, t2x_tlast, t2y_tlast}), + .o_tvalid ({t2t_tvalid, t2x_tvalid, t2y_tvalid}), + .o_tready ({t2t_tready, t2x_tready, t2y_tready}) + ); + + wire [WIDTH-1:0] xi_gt_tdata; + wire xi_gt_tdest; + wire xi_gt_tlast; + wire xi_gt_tvalid; + wire xi_gt_tready; + wire [WIDTH-1:0] x2t_tdata, x2x_tdata, x2y_tdata; + wire x2t_tdest, x2x_tdest, x2y_tdest; + wire x2t_tlast, x2x_tlast, x2y_tlast; + wire x2t_tvalid, x2x_tvalid, x2y_tvalid; + wire x2t_tready, x2x_tready, x2y_tready; + wire [WIDTH-1:0] xdim_in_hdr; + wire [1:0] xdim_in_port; + + assign {xi_gt_tdest, xdim_in_port} = xdim_in_route(xdim_in_hdr); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) xdim_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_xdim_tdata), + .s_axis_tdest (s_axis_xdim_tdest), + .s_axis_tlast (s_axis_xdim_tlast), + .s_axis_tvalid (s_axis_xdim_tvalid), + .s_axis_tready (s_axis_xdim_tready), + .m_axis_tdata (xi_gt_tdata), + .m_axis_tlast (xi_gt_tlast), + .m_axis_tvalid (xi_gt_tvalid), + .m_axis_tready (xi_gt_tready) + ); + + axi_demux #( + .WIDTH(WIDTH+1), .SIZE(3), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) xdim_in_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (xdim_in_hdr), + .dest (xdim_in_port), + .i_tdata ({xi_gt_tdest, xi_gt_tdata}), + .i_tlast (xi_gt_tlast), + .i_tvalid (xi_gt_tvalid), + .i_tready (xi_gt_tready), + .o_tdata ({x2t_tdest, x2t_tdata, x2x_tdest, x2x_tdata, x2y_tdest, x2y_tdata}), + .o_tlast ({x2t_tlast, x2x_tlast, x2y_tlast}), + .o_tvalid ({x2t_tvalid, x2x_tvalid, x2y_tvalid}), + .o_tready ({x2t_tready, x2x_tready, x2y_tready}) + ); + + wire [WIDTH-1:0] yi_gt_tdata; + wire yi_gt_tdest; + wire yi_gt_tlast; + wire yi_gt_tvalid; + wire yi_gt_tready; + wire [WIDTH-1:0] y2t_tdata, y2y_tdata; + wire y2t_tdest, y2y_tdest; + wire y2t_tlast, y2y_tlast; + wire y2t_tvalid, y2y_tvalid; + wire y2t_tready, y2y_tready; + wire [WIDTH-1:0] ydim_in_hdr; + wire [0:0] ydim_in_port; + + assign {yi_gt_tdest, ydim_in_port} = ydim_in_route(ydim_in_hdr); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) ydim_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_ydim_tdata ), + .s_axis_tdest (s_axis_ydim_tdest ), + .s_axis_tlast (s_axis_ydim_tlast ), + .s_axis_tvalid (s_axis_ydim_tvalid), + .s_axis_tready (s_axis_ydim_tready), + .m_axis_tdata (yi_gt_tdata ), + .m_axis_tlast (yi_gt_tlast ), + .m_axis_tvalid (yi_gt_tvalid), + .m_axis_tready (yi_gt_tready) + ); + + axi_demux #( + .WIDTH(WIDTH+1), .SIZE(2), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) ydim_in_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (ydim_in_hdr), + .dest (ydim_in_port), + .i_tdata ({yi_gt_tdest, yi_gt_tdata}), + .i_tlast (yi_gt_tlast), + .i_tvalid (yi_gt_tvalid), + .i_tready (yi_gt_tready), + .o_tdata ({y2t_tdest, y2t_tdata, y2y_tdest, y2y_tdata}), + .o_tlast ({y2t_tlast, y2y_tlast}), + .o_tvalid ({y2t_tvalid, y2y_tvalid}), + .o_tready ({y2t_tready, y2y_tready}) + ); + + // ------------------------------------------------- + // Output muxes + // ------------------------------------------------- + wire term_tdest_discard; + axi_mux #( + .WIDTH(WIDTH+1), .SIZE(3), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) term_out_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2t_tdest, t2t_tdata, x2t_tdest, x2t_tdata, y2t_tdest, y2t_tdata}), + .i_tlast ({t2t_tlast, x2t_tlast, y2t_tlast }), + .i_tvalid ({t2t_tvalid, x2t_tvalid, y2t_tvalid}), + .i_tready ({t2t_tready, x2t_tready, y2t_tready}), + .o_tdata ({term_tdest_discard, m_axis_term_tdata}), + .o_tlast (m_axis_term_tlast), + .o_tvalid (m_axis_term_tvalid), + .o_tready (m_axis_term_tready) + ); + + axi_mux #( + .WIDTH(WIDTH+1), .SIZE(2), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) xdim_out_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2x_tdest, t2x_tdata, x2x_tdest, x2x_tdata}), + .i_tlast ({t2x_tlast, x2x_tlast}), + .i_tvalid ({t2x_tvalid, x2x_tvalid}), + .i_tready ({t2x_tready, x2x_tready}), + .o_tdata ({m_axis_xdim_tdest, m_axis_xdim_tdata}), + .o_tlast (m_axis_xdim_tlast ), + .o_tvalid (m_axis_xdim_tvalid), + .o_tready (m_axis_xdim_tready) + ); + + axi_mux #( + .WIDTH(WIDTH+1), .SIZE(3), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) ydim_out_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2y_tdest, t2y_tdata, x2y_tdest, x2y_tdata, y2y_tdest, y2y_tdata}), + .i_tlast ({t2y_tlast, x2y_tlast, y2y_tlast }), + .i_tvalid ({t2y_tvalid, x2y_tvalid, y2y_tvalid}), + .i_tready ({t2y_tready, x2y_tready, y2y_tready}), + .o_tdata ({m_axis_ydim_tdest, m_axis_ydim_tdata}), + .o_tlast (m_axis_ydim_tlast), + .o_tvalid (m_axis_ydim_tvalid), + .o_tready (m_axis_ydim_tready) + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v new file mode 100644 index 000000000..21a66782d --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v @@ -0,0 +1,294 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: torus_2d_dor_router_single_sw +// Description: +// This module implements the router for a 2-dimentional (2d) +// torus network that uses dimension order routing (dor) and has a +// single underlying switch (single_sw). It uses AXI-Stream for all of its +// links. +// The torus topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - WIDTH: Width of the AXI-Stream data bus +// - DIM_SIZE: Number of routers alone one dimension +// - XB_ADDR_X: The X-coordinate of this router in the topology +// - XB_ADDR_Y: The Y-coordinate of this router in the topology +// - TERM_BUFF_SIZE: log2 of the ingress terminal buffer size (in words) +// - XB_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words) +// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers. +// * WORMHOLE: Allocate route as soon as first word in pkt arrives +// * CUT-THROUGH: Allocate route only after the full pkt arrives +// - SWITCH_ALLOC: Algorithm to allocate the switch +// * PRIO: Priority based. Priority: Y-dim > X-dim > Term +// * ROUND-ROBIN: Round robin input port allocation +// Signals: +// - *_axis_term_*: Terminal ports (master/slave) +// - *_axis_xdim_*: Inter-router X-dim connections (master/slave) +// - *_axis_ydim_*: Inter-router Y-dim connections (master/slave) +// + +module torus_2d_dor_router_single_sw #( + parameter WIDTH = 64, + parameter DIM_SIZE = 4, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0, + parameter TERM_BUFF_SIZE = 5, + parameter XB_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", + parameter SWITCH_ALLOC = "PRIO" +) ( + // Clocks and resets + input wire clk, + input wire reset, + + // Terminal connections + input wire [WIDTH-1:0] s_axis_term_tdata, + input wire s_axis_term_tlast, + input wire s_axis_term_tvalid, + output wire s_axis_term_tready, + output wire [WIDTH-1:0] m_axis_term_tdata, + output wire m_axis_term_tlast, + output wire m_axis_term_tvalid, + input wire m_axis_term_tready, + + // X-dimension inter-XB connections + input wire [WIDTH-1:0] s_axis_xdim_tdata, + input wire [0:0] s_axis_xdim_tdest, + input wire s_axis_xdim_tlast, + input wire s_axis_xdim_tvalid, + output wire s_axis_xdim_tready, + output wire [WIDTH-1:0] m_axis_xdim_tdata, + output wire [0:0] m_axis_xdim_tdest, + output wire m_axis_xdim_tlast, + output wire m_axis_xdim_tvalid, + input wire m_axis_xdim_tready, + + // Y-dimension inter-XB connections + input wire [WIDTH-1:0] s_axis_ydim_tdata, + input wire [0:0] s_axis_ydim_tdest, + input wire s_axis_ydim_tlast, + input wire s_axis_ydim_tvalid, + output wire s_axis_ydim_tready, + output wire [WIDTH-1:0] m_axis_ydim_tdata, + output wire [0:0] m_axis_ydim_tdest, + output wire m_axis_ydim_tlast, + output wire m_axis_ydim_tvalid, + input wire m_axis_ydim_tready +); + + //------------------------------------------------- + // Routing and switch allocation functions + //------------------------------------------------- + + // mesh_node_mapping.vh file contains the mapping between the node number + // and its XY coordinates. It is autogenerated and defines the node_to_xdst + // and node_to_ydst functions. + `include "mesh_node_mapping.vh" + + localparam [1:0] SW_DEST_TERM = 2'd0; + localparam [1:0] SW_DEST_XDIM = 2'd1; + localparam [1:0] SW_DEST_YDIM = 2'd2; + localparam [1:0] SW_NUM_DESTS = 2'd3; + + // The compute_switch_tdest function is the destination selector + // i.e. it will inspecte the bottom $clog2(DIM_SIZE)*2 bits of the + // first word of a packet and determine the destination of the packet. + function [2:0] compute_switch_tdest; + input [WIDTH-1:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, 2 LSBs are the router destination + // - Long journeys get VC = 1 to bypass local traffic + if (xdiff == 'd0 && ydiff == 'd0) begin + compute_switch_tdest = {1'b0 /* VC don't care */, SW_DEST_TERM}; + end else if (xdiff != 'd0) begin + compute_switch_tdest = {(xdiff < 0), SW_DEST_XDIM}; + end else begin + compute_switch_tdest = {(ydiff < 0), SW_DEST_YDIM}; + end + //$display("xdst=%d, ydst=%d, xaddr=%d, yaddr=%d, dst=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y, compute_switch_tdest); + end + endfunction + + // The compute_switch_alloc function is the switch allocation function + // i.e. it chooses which input port reserves the switch for packet transfer. + // After the switch is allocated, all other ports will be backpressured until + // the packet finishes transferring. + function [1:0] compute_switch_alloc; + input [2:0] pkt_waiting; + input [1:0] last_alloc; + begin + if (pkt_waiting == 3'b000) begin + compute_switch_alloc = SW_DEST_TERM; + end else if (pkt_waiting == 3'b001) begin + compute_switch_alloc = SW_DEST_TERM; + end else if (pkt_waiting == 3'b010) begin + compute_switch_alloc = SW_DEST_XDIM; + end else if (pkt_waiting == 3'b100) begin + compute_switch_alloc = SW_DEST_YDIM; + end else begin + if (SWITCH_ALLOC == "PRIO") begin + // Priority: Y-dim > X-dim > Term + if (pkt_waiting[SW_DEST_YDIM]) + compute_switch_alloc = SW_DEST_YDIM; + else if (pkt_waiting[SW_DEST_XDIM]) + compute_switch_alloc = SW_DEST_XDIM; + else + compute_switch_alloc = SW_DEST_TERM; + end else begin + // Round-robin + if (pkt_waiting[(last_alloc + 3'd1) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd1) % SW_NUM_DESTS; + else if (pkt_waiting[(last_alloc + 3'd2) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd2) % SW_NUM_DESTS; + else + compute_switch_alloc = last_alloc; + end + end + end + endfunction + + //------------------------------------------------- + // Ingress buffers + //------------------------------------------------- + wire [WIDTH-1:0] ydim_in_data , xdim_in_data , term_in_data ; + wire [2:0] ydim_in_dest , xdim_in_dest , term_in_dest ; + wire ydim_in_last , xdim_in_last , term_in_last ; + wire ydim_in_valid, xdim_in_valid, term_in_valid; + wire ydim_in_ready, xdim_in_ready, term_in_ready; + + // Data coming in from the terminal is gated until a full packet arrives + // in order to minimize the switch allocation time per packet. + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE) + ) term_in_pkt_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_term_tdata), + .i_tlast (s_axis_term_tlast), + .i_tvalid (s_axis_term_tvalid), + .i_tready (s_axis_term_tready), + .i_terror (1'b0), + .o_tdata (term_in_data), + .o_tlast (term_in_last), + .o_tvalid (term_in_valid), + .o_tready (term_in_ready) + ); + assign term_in_dest = compute_switch_tdest(term_in_data); + + // The XY directions have buffers with 2 virtual channels to minimize the + // possibility of a deadlock. + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) xdim_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_xdim_tdata), + .s_axis_tdest (s_axis_xdim_tdest), + .s_axis_tlast (s_axis_xdim_tlast), + .s_axis_tvalid (s_axis_xdim_tvalid), + .s_axis_tready (s_axis_xdim_tready), + .m_axis_tdata (xdim_in_data), + .m_axis_tlast (xdim_in_last), + .m_axis_tvalid (xdim_in_valid), + .m_axis_tready (xdim_in_ready) + ); + assign xdim_in_dest = compute_switch_tdest(xdim_in_data); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) ydim_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_ydim_tdata ), + .s_axis_tdest (s_axis_ydim_tdest ), + .s_axis_tlast (s_axis_ydim_tlast ), + .s_axis_tvalid (s_axis_ydim_tvalid), + .s_axis_tready (s_axis_ydim_tready), + .m_axis_tdata (ydim_in_data ), + .m_axis_tlast (ydim_in_last ), + .m_axis_tvalid (ydim_in_valid), + .m_axis_tready (ydim_in_ready) + ); + assign ydim_in_dest = compute_switch_tdest(ydim_in_data); + + //------------------------------------------------- + // Switch + //------------------------------------------------- + + // Track the input packet state + localparam [0:0] PKT_ST_HEAD = 1'b0; + localparam [0:0] PKT_ST_BODY = 1'b1; + reg [0:0] pkt_state = PKT_ST_HEAD; + + // The switch only accept packets on a single port at a time. + wire sw_in_ready = |({ydim_in_ready, xdim_in_ready, term_in_ready}); + wire sw_in_valid = |({ydim_in_valid, xdim_in_valid, term_in_valid}); + wire sw_in_last = |({ydim_in_last&ydim_in_valid, xdim_in_last&xdim_in_valid, term_in_last&term_in_valid}); + + always @(posedge clk) begin + if (reset) begin + pkt_state <= PKT_ST_HEAD; + end else if (sw_in_valid & sw_in_ready) begin + pkt_state <= sw_in_last ? PKT_ST_HEAD : PKT_ST_BODY; + end + end + + // The switch requires the allocation to stay valid until the + // end of the packet. We also might need to keep the previous + // packet's allocation to compute the current one + wire [1:0] switch_alloc; + reg [1:0] prev_switch_alloc = SW_DEST_TERM; + reg [1:0] pkt_switch_alloc = SW_DEST_TERM; + + always @(posedge clk) begin + if (reset) begin + prev_switch_alloc <= SW_DEST_TERM; + pkt_switch_alloc <= SW_DEST_TERM; + end else if (sw_in_valid & sw_in_ready) begin + if (pkt_state == PKT_ST_HEAD) + pkt_switch_alloc <= switch_alloc; + if (sw_in_last) + prev_switch_alloc <= switch_alloc; + end + end + + assign switch_alloc = (sw_in_valid && pkt_state == PKT_ST_HEAD) ? + compute_switch_alloc({ydim_in_valid, xdim_in_valid, term_in_valid}, prev_switch_alloc) : + pkt_switch_alloc; + + wire term_tdest_discard; + axis_switch #( + .DATA_W(WIDTH), .DEST_W(1), .IN_PORTS(3), .OUT_PORTS(3) + ) switch_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata ({ydim_in_data , xdim_in_data , term_in_data }), + .s_axis_tdest ({ydim_in_dest , xdim_in_dest , term_in_dest }), + .s_axis_tlast ({ydim_in_last , xdim_in_last , term_in_last }), + .s_axis_tvalid ({ydim_in_valid, xdim_in_valid, term_in_valid}), + .s_axis_tready ({ydim_in_ready, xdim_in_ready, term_in_ready}), + .s_axis_alloc (switch_alloc), + .m_axis_tdata ({m_axis_ydim_tdata, m_axis_xdim_tdata, m_axis_term_tdata }), + .m_axis_tdest ({m_axis_ydim_tdest, m_axis_xdim_tdest, term_tdest_discard}), + .m_axis_tlast ({m_axis_ydim_tlast, m_axis_xdim_tlast, m_axis_term_tlast }), + .m_axis_tvalid ({m_axis_ydim_tvalid, m_axis_xdim_tvalid, m_axis_term_tvalid}), + .m_axis_tready ({m_axis_ydim_tready, m_axis_xdim_tready, m_axis_term_tready}) + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/cvita_hdr_decoder.v b/fpga/usrp3/lib/rfnoc/cvita_hdr_decoder.v new file mode 100644 index 000000000..289c674e8 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/cvita_hdr_decoder.v @@ -0,0 +1,32 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Decoder header word into CVITA header fields + +module cvita_hdr_decoder ( + input [127:0] header, + output [1:0] pkt_type, output eob, output has_time, + output [11:0] seqnum, output [15:0] length, output [15:0] payload_length, + output [15:0] src_sid, output [15:0] dst_sid, + output [63:0] vita_time +); + + wire [63:0] hdr[0:1]; + assign hdr[0] = header[127:64]; + assign hdr[1] = header[63:0]; + + assign pkt_type = hdr[0][63:62]; + assign has_time = hdr[0][61]; + assign eob = hdr[0][60]; + assign seqnum = hdr[0][59:48]; + assign length = hdr[0][47:32]; + assign src_sid = hdr[0][31:16]; + assign dst_sid = hdr[0][15:0]; + assign vita_time = hdr[1]; + + assign payload_length = has_time ? length - 16'd16 : length - 16'd8; + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/cvita_hdr_encoder.v b/fpga/usrp3/lib/rfnoc/cvita_hdr_encoder.v new file mode 100644 index 000000000..82bfcb2ae --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/cvita_hdr_encoder.v @@ -0,0 +1,22 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Encodes CVITA packet header fields into a header word + +module cvita_hdr_encoder ( + input [1:0] pkt_type, input eob, input has_time, + input [11:0] seqnum, + input [15:0] payload_length, + input [15:0] src_sid, input [15:0] dst_sid, + input [63:0] vita_time, + output [127:0] header +); + + assign header = {pkt_type, has_time, eob, seqnum, + payload_length + (has_time ? 16'd16 : 16'd8), + src_sid, dst_sid, vita_time}; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/cvita_hdr_modify.v b/fpga/usrp3/lib/rfnoc/cvita_hdr_modify.v new file mode 100644 index 000000000..874e1776c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/cvita_hdr_modify.v @@ -0,0 +1,36 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Modifies CVITA packet header fields + +module cvita_hdr_modify ( + input [127:0] header_in, + output [127:0] header_out, + input use_pkt_type, input [1:0] pkt_type, + input use_has_time, input has_time, + input use_eob, input eob, + input use_seqnum, input [11:0] seqnum, + input use_length, input [15:0] length, + input use_payload_length, input [15:0] payload_length, + input use_src_sid, input [15:0] src_sid, + input use_dst_sid, input [15:0] dst_sid, + input use_vita_time, input [63:0] vita_time +); + + wire [15:0] length_adj = payload_length + (header_out[125] /* Has time */ ? 16'd16 : 16'd8); + + assign header_out = { + (use_pkt_type == 1'b1) ? pkt_type : header_in[127:126], + (use_has_time == 1'b1) ? has_time : header_in[125], + (use_eob == 1'b1) ? eob : header_in[124], + (use_seqnum == 1'b1) ? seqnum : header_in[123:112], + (use_length == 1'b1) ? length : + (use_payload_length == 1'b1) ? length_adj : header_in[111:96], + (use_src_sid == 1'b1) ? src_sid : header_in[95:80], + (use_dst_sid == 1'b1) ? dst_sid : header_in[79:64], + (use_vita_time == 1'b1) ? vita_time : header_in[63:0]}; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/cvita_hdr_parser.v b/fpga/usrp3/lib/rfnoc/cvita_hdr_parser.v new file mode 100644 index 000000000..d64c3b6d9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/cvita_hdr_parser.v @@ -0,0 +1,89 @@ +// +// Copyright 2015 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Parses CVITA packet header and outputs discrete signals. + +module cvita_hdr_parser #( + parameter REGISTER = 1 // 0 = No registering, header / vita_time only valid when hdr_stb / vita_time_stb is asserted (lower resource utilization) + // 1 = Header / vita time are registered and valid for the length of entire packet. +)( + input clk, input reset, input clear, + output hdr_stb, + output [1:0] pkt_type, output eob, output has_time, + output [11:0] seqnum, output [15:0] length, output [15:0] payload_length, + output [15:0] src_sid, output [15:0] dst_sid, + output vita_time_stb, + output [63:0] vita_time, + input [63:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [63:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + generate + if (REGISTER) begin + axi_fifo_flop2 #(.WIDTH(65)) axi_fifo_flop ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({i_tlast,i_tdata}), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata({o_tlast,o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready), + .space(), .occupied()); + end else begin + assign o_tdata = i_tdata; + assign o_tlast = i_tlast; + assign o_tvalid = i_tvalid; + assign i_tready = o_tready; + end + endgenerate + + reg first_time, first_line, read_time; + wire [63:0] hdr, hdr_vita_time; + reg [63:0] hdr_reg, vita_time_reg; + + always @(posedge clk) begin + if (reset | clear) begin + first_time <= 1'b1; + first_line <= 1'b1; + read_time <= 1'b0; + hdr_reg <= 64'd0; + vita_time_reg <= 64'd0; + end else begin + if (o_tvalid & o_tready) begin + first_time <= 1'b0; + if (first_line) begin + hdr_reg <= o_tdata; + first_line <= 1'b0; + if (has_time & ~o_tlast) begin + read_time <= 1'b1; + end + end + if (read_time) begin + vita_time_reg <= o_tdata; + read_time <= 1'b0; + end + if (o_tlast) begin + first_line <= 1'b1; + end + end + end + end + + // REGISTER = 0: Always use o_tdata, output only valid when hdr_stb = 1 + // REGISTER = 1: Mux to make sure header output is available immediately and also registered for rest of packet. + assign hdr = (hdr_stb | (REGISTER == 0)) ? o_tdata : hdr_reg; + assign hdr_vita_time = (vita_time_stb | (REGISTER == 0)) ? o_tdata : vita_time_reg; + + assign hdr_stb = first_line & o_tvalid & o_tready; + assign pkt_type = hdr[63:62]; + assign has_time = hdr[61]; + assign eob = hdr[60]; + assign seqnum = hdr[59:48]; + assign length = hdr[47:32]; + assign payload_length = length - (has_time ? 16'd16 : 16'd8); + assign src_sid = hdr[31:16]; + assign dst_sid = hdr[15:0]; + + assign vita_time_stb = read_time & o_tvalid & o_tready; + assign vita_time = hdr_vita_time; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/data_types.vh b/fpga/usrp3/lib/rfnoc/data_types.vh new file mode 100644 index 000000000..006deb9c2 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/data_types.vh @@ -0,0 +1,23 @@ +// Number of bytes per word +typedef struct { + string name; + int bytes_per_word; +} cvita_data_type_t; +localparam cvita_data_type_t U16 = '{name:"U16", bytes_per_word:2}; // uint16 +localparam cvita_data_type_t U32 = '{name:"U32", bytes_per_word:4}; // uint32 +localparam cvita_data_type_t U64 = '{name:"U64", bytes_per_word:8}; // uint64 +localparam cvita_data_type_t U128 = '{name:"U128", bytes_per_word:16}; // uint128 +localparam cvita_data_type_t S8 = '{name:"S8", bytes_per_word:1}; // int8 +localparam cvita_data_type_t S16 = '{name:"S16", bytes_per_word:2}; // int16 +localparam cvita_data_type_t S32 = '{name:"S32", bytes_per_word:4}; // int32 +localparam cvita_data_type_t S64 = '{name:"S64", bytes_per_word:8}; // int64 +localparam cvita_data_type_t S128 = '{name:"S128", bytes_per_word:16}; // int128 +localparam cvita_data_type_t SC8 = '{name:"SC8", bytes_per_word:2}; // complex int8 +localparam cvita_data_type_t SC12 = '{name:"SC12", bytes_per_word:3}; // complex int12 +localparam cvita_data_type_t SC16 = '{name:"SC16", bytes_per_word:4}; // complex int16 +localparam cvita_data_type_t SC32 = '{name:"SC32", bytes_per_word:8}; // complex int32 +localparam cvita_data_type_t SC64 = '{name:"SC64", bytes_per_word:16}; // complex int64 +localparam cvita_data_type_t F32 = '{name:"F32", bytes_per_word:4}; // single precision float +localparam cvita_data_type_t F64 = '{name:"F64", bytes_per_word:8}; // double precision float +localparam cvita_data_type_t FC32 = '{name:"FC32", bytes_per_word:8}; // single precision complex float +localparam cvita_data_type_t FC64 = '{name:"FC64", bytes_per_word:16}; // double precision complex float
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/datapath_gatekeeper.v b/fpga/usrp3/lib/rfnoc/datapath_gatekeeper.v new file mode 100644 index 000000000..0289c6c27 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/datapath_gatekeeper.v @@ -0,0 +1,62 @@ +// +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Description: +// A gatekeeper module for data packets entering and leaving +// the user logic in an RFNoC block. This module keeps track +// of the packet count for software to detect activity and +// provides a mechanism to flush packets from software. Useful +// to prevent slow-moving or misbehaving noc blocks from clogging +// up the infrastructure. + +module datapath_gatekeeper #( + parameter WIDTH = 64, + parameter COUNT_W = 16 +)( + // Clocks and resets + input wire clk, + input wire reset, + // Input data stream + input wire [WIDTH-1:0] s_axis_tdata, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + // Output data stream + output wire [WIDTH-1:0] m_axis_tdata, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready, + // Settings and Status + input wire flush, // Drop all packets coming into module + output wire flushing, // Is the module still dropping packets? + output wire [COUNT_W-1:0] pkt_count // Input packet counter (includes drops) +); + + axis_strm_monitor #( + .WIDTH(1), .COUNT_W(COUNT_W), + .PKT_LENGTH_EN(0), .PKT_CHKSUM_EN(0), + .PKT_COUNT_EN(1), .XFER_COUNT_EN(0) + ) monitor_i ( + .clk(clk), .reset(reset), + .axis_tdata(1'b0), .axis_tlast(s_axis_tlast), + .axis_tvalid(s_axis_tvalid), .axis_tready(s_axis_tready), + .sop(), .eop(), + .pkt_length(), .pkt_chksum(), + .pkt_count(pkt_count), .xfer_count() + ); + + axis_packet_flush #( + .WIDTH(WIDTH), .FLUSH_PARTIAL_PKTS(0), + .TIMEOUT_W(1), .PIPELINE("NONE") + ) flusher_i ( + .clk(clk), .reset(reset), + .enable(flush), .timeout(1'b0), .flushing(flushing), .done(), + .s_axis_tdata(s_axis_tdata), .s_axis_tlast(s_axis_tlast), + .s_axis_tvalid(s_axis_tvalid), .s_axis_tready(s_axis_tready), + .m_axis_tdata(m_axis_tdata), .m_axis_tlast(m_axis_tlast), + .m_axis_tvalid(m_axis_tvalid), .m_axis_tready(m_axis_tready) + ); + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/ddc.v b/fpga/usrp3/lib/rfnoc/ddc.v new file mode 100644 index 000000000..a14f001ff --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/ddc.v @@ -0,0 +1,635 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +//! RFNoC specific digital down-conversion chain + +module ddc #( + parameter SR_FREQ_ADDR = 0, + parameter SR_SCALE_IQ_ADDR = 1, + parameter SR_DECIM_ADDR = 2, + parameter SR_MUX_ADDR = 3, + parameter SR_COEFFS_ADDR = 4, + parameter PRELOAD_HBS = 1, // Preload half band filter state with 0s + parameter NUM_HB = 3, + parameter CIC_MAX_DECIM = 255, + parameter SAMPLE_WIDTH = 16, + parameter WIDTH = 24 +)( + input clk, input reset, + input clear, // Resets everything except the timed phase inc FIFO and phase inc + input set_stb, input [7:0] set_addr, input [31:0] set_data, + input timed_set_stb, input [7:0] timed_set_addr, input [31:0] timed_set_data, + input [31:0] sample_in_tdata, + input sample_in_tvalid, + input sample_in_tlast, + output sample_in_tready, + input sample_in_tuser, + input sample_in_eob, + output [31:0] sample_out_tdata, + output sample_out_tvalid, + input sample_out_tready, + output sample_out_tlast +); + + localparam cwidth = 25; + localparam zwidth = 24; + + wire [31:0] sr_phase_inc, sr_phase_inc_timed_tdata; + wire sr_phase_inc_valid, sr_phase_inc_timed_tvalid, sr_phase_inc_timed_tready, sr_phase_inc_timed_tlast; + reg [31:0] phase_inc; + reg [31:0] phase; + reg phase_inc_valid; + + wire [SAMPLE_WIDTH*2-1:0] dds_in_tdata; + wire dds_in_tlast; + wire dds_in_tvalid; + wire dds_in_tready; + wire [SAMPLE_WIDTH*2-1:0] dds_in_fifo_tdata; + wire dds_in_fifo_tlast; + wire dds_in_fifo_tvalid; + wire dds_in_fifo_tready; + wire [WIDTH-1:0] dds_in_i_tdata; + wire [WIDTH-1:0] dds_in_q_tdata; + wire [WIDTH-1:0] dds_out_i_tdata; + wire [WIDTH-1:0] dds_out_q_tdata; + + wire [SAMPLE_WIDTH*2-1:0] dds_in_sync_tdata; + wire dds_in_sync_tvalid, dds_in_sync_tready, dds_in_sync_tlast; + wire [WIDTH-1:0] phase_sync_tdata; + wire phase_sync_tvalid, phase_sync_tready, phase_sync_tlast; + + wire [WIDTH-1:0] phase_tdata = phase[31:32-WIDTH]; + wire phase_tvalid, phase_tready, phase_tlast; + wire dds_out_tlast; + wire dds_out_tvalid; + wire [15:0] dds_input_fifo_space, dds_input_fifo_occupied; + + wire [17:0] scale_factor; + wire last_cic; + wire last_cic_decimate_in; + wire strobe_dds_clip; + wire [WIDTH-1:0] i_dds_clip, q_dds_clip; + wire [WIDTH-1:0] i_cic, q_cic; + wire [46:0] i_hb1, q_hb1; + wire [46:0] i_hb2, q_hb2; + wire [47:0] i_hb3, q_hb3; + wire sample_out_stb; + + wire strobe_cic, strobe_hb1, strobe_hb2, strobe_hb3; + wire ddc_chain_tready; + + reg [7:0] cic_decim_rate; + wire [7:0] cic_decim_rate_int; + wire rate_changed; + + wire [SAMPLE_WIDTH-1:0] sample_in_i = {sample_in_tdata[31:16]}; + wire [SAMPLE_WIDTH-1:0] sample_in_q = {sample_in_tdata[15:0]}; + + wire sample_mux_tready; + wire sample_mux_set_freq; + wire [SAMPLE_WIDTH-1:0] sample_mux_i, sample_mux_q; + wire realmode; + wire swap_iq; + + reg [1:0] hb_rate; + wire [1:0] hb_rate_int; + wire [2:0] enable_hb = { hb_rate == 2'b11, hb_rate[1] == 1'b1, hb_rate != 2'b00 }; + + wire reload_go, reload_we1, reload_we2, reload_we3, reload_ld1, reload_ld2, reload_ld3; + wire [17:0] coef_din; + + //phase incr settings regs and mux. + setting_reg #(.my_addr(SR_FREQ_ADDR)) set_freq ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(sr_phase_inc),.changed(sr_phase_inc_valid)); + + assign sr_phase_inc_timed_tready = sample_in_tvalid & sample_in_tready & sample_mux_set_freq; + + axi_setting_reg #( + .ADDR(SR_FREQ_ADDR), + .USE_FIFO(1), + .FIFO_SIZE(5)) + set_freq_timed ( + .clk(clk), .reset(reset), .error_stb(), + .set_stb(timed_set_stb), .set_addr(timed_set_addr), .set_data(timed_set_data), + .o_tdata(sr_phase_inc_timed_tdata), .o_tlast(sr_phase_inc_timed_tlast), .o_tvalid(sr_phase_inc_timed_tvalid), + .o_tready(sr_phase_inc_timed_tready)); + + // Load phase increment depending on whether or not the settings bus write is + // a timed command. Non-timed commands get priority. + always @(posedge clk) begin + if (reset) begin + phase_inc <= 'd0; + phase_inc_valid <= 'd0; + end else begin + if (sr_phase_inc_valid) begin + phase_inc <= sr_phase_inc; + phase_inc_valid <= sr_phase_inc_valid; + end else if (sr_phase_inc_timed_tvalid & sr_phase_inc_timed_tready) begin + phase_inc <= sr_phase_inc_timed_tdata; + phase_inc_valid <= sr_phase_inc_timed_tvalid; + end else + phase_inc_valid <= 1'b0; + end + end + + setting_reg #(.my_addr(SR_SCALE_IQ_ADDR), .width(18)) set_scale_iq ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(scale_factor),.changed()); + + setting_reg #(.my_addr(SR_DECIM_ADDR), .width(10), .at_reset(1 /* No decimation */)) set_decim ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out({hb_rate_int, cic_decim_rate_int}),.changed(rate_changed)); + + setting_reg #(.my_addr(SR_MUX_ADDR), .width(2)) set_mux ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out({realmode,swap_iq}),.changed()); + + setting_reg #(.my_addr(SR_COEFFS_ADDR), .width(24)) set_coeffs ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out({reload_ld3,reload_we3,reload_ld2,reload_we2,reload_ld1,reload_we1,coef_din}),.changed(reload_go)); + + // Prevent changing rate while processing samples as this + // will corrupt the output + reg active, rate_changed_hold, rate_changed_stb; + always @(posedge clk) begin + if (reset) begin + active <= 1'b0; + rate_changed_hold <= 1'b0; + rate_changed_stb <= 1'b0; + cic_decim_rate <= 'd1; + hb_rate <= 'd0; + end else begin + if (clear) begin + active <= 1'b0; + end else if (sample_in_tvalid & sample_in_tready) begin + active <= 1'b1; + end + if (rate_changed & active) begin + rate_changed_hold <= 1'b1; + end + if ((clear | ~active) & (rate_changed | rate_changed_hold)) begin + rate_changed_hold <= 1'b0; + rate_changed_stb <= 1'b1; + cic_decim_rate <= cic_decim_rate_int; + hb_rate <= hb_rate_int; + end else begin + rate_changed_stb <= 1'b0; + end + end + end + + + //doesn't need to be registered and now can have back pressure from dds + assign sample_mux_set_freq = sample_in_tuser; + assign sample_mux_i = swap_iq ? sample_in_q : sample_in_i; + assign sample_mux_q = realmode ? 'd0 : (swap_iq ? sample_in_i : sample_in_q); + + /** Phase accumulator, Xilinx DDS/Complex Mult **/ + + //connect samples to dds + assign dds_in_tdata = {sample_mux_i,sample_mux_q}; + assign dds_in_tvalid = sample_in_tvalid & ddc_chain_tready; //if the rest of the chain isn't ready, then halt all data flow. this should help with rate changes... + assign dds_in_tlast = sample_in_tlast; + assign sample_in_tready = dds_in_tready & ddc_chain_tready; + + assign phase_tvalid = dds_in_tvalid; + assign phase_tlast = dds_in_tlast; + + // NCO + always @(posedge clk) begin + if (reset | clear | (phase_inc_valid & sr_phase_inc_timed_tready) | sample_in_eob) begin + phase <= 0; + end else if (dds_in_tvalid & dds_in_tready) begin //only increment phase when data is ready + phase <= phase + phase_inc; + end + end + + // Sync the two path's pipeline delay. + // This is needed to ensure that applying the phase update happens on the + // correct sample regardless of differing downstream path delays. + axi_sync #( + .SIZE(2), + .WIDTH_VEC({WIDTH,2*SAMPLE_WIDTH}), // Vector of widths, each width is defined by a 32-bit value + .FIFO_SIZE(0)) + axi_sync ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({phase_tdata,dds_in_tdata}), + .i_tlast({phase_tlast,dds_in_tlast}), + .i_tvalid({phase_tvalid,dds_in_tvalid}), + .i_tready({phase_tready,dds_in_tready}), + .o_tdata({phase_sync_tdata,dds_in_sync_tdata}), + .o_tlast({phase_sync_tlast,dds_in_sync_tlast}), + .o_tvalid({phase_sync_tvalid,dds_in_sync_tvalid}), + .o_tready({phase_sync_tready,dds_in_sync_tready})); + + //hold data to align with dds pipelining + axi_fifo #(.WIDTH(2*SAMPLE_WIDTH+1), .SIZE(5)) dds_input_fifo + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata({dds_in_sync_tlast,dds_in_sync_tdata}), .i_tvalid(dds_in_sync_tvalid), .i_tready(dds_in_sync_tready), + .o_tdata({dds_in_fifo_tlast,dds_in_fifo_tdata}), .o_tvalid(dds_in_fifo_tvalid), .o_tready(dds_in_fifo_tready), + .space(dds_input_fifo_space), .occupied(dds_input_fifo_occupied) + ); + + // after fifo, do q quick sign extend op to get up to 24 bits. to match how the dds deals with the data path. + // add extra bits to fit the dds width, 5 bits added here + sign_extend #( + .bits_in(SAMPLE_WIDTH), .bits_out(WIDTH)) + sign_extend_dds_i ( + .in({dds_in_fifo_tdata[2*SAMPLE_WIDTH-1:SAMPLE_WIDTH]}), .out(dds_in_i_tdata)); + + sign_extend #( + .bits_in(SAMPLE_WIDTH), .bits_out(WIDTH)) + sign_extend_dds_q ( + .in({dds_in_fifo_tdata[SAMPLE_WIDTH-1:0]}), .out(dds_in_q_tdata)); + + + dds_freq_tune dds_freq_tune_inst ( + .clk(clk), + .reset(reset | clear), + .eob(sample_in_eob), + .rate_changed(rate_changed_hold), + .dds_input_fifo_occupied(dds_input_fifo_occupied), + /* IQ input */ + .s_axis_din_tlast(dds_in_fifo_tlast), + .s_axis_din_tvalid(dds_in_fifo_tvalid), + .s_axis_din_tready(dds_in_fifo_tready), + .s_axis_din_tdata({dds_in_q_tdata, dds_in_i_tdata}), //48 = WIDTH*2 + /* Phase input from NCO */ + .s_axis_phase_tvalid(phase_sync_tvalid), + .s_axis_phase_tready(phase_sync_tready), // used in the axi_sync + .s_axis_phase_tlast(phase_sync_tlast), + .s_axis_phase_tdata(phase_sync_tdata), //24 bit = WIDTH + /* IQ output */ + .m_axis_dout_tlast(dds_out_tlast), + .m_axis_dout_tvalid(dds_out_tvalid), + .m_axis_dout_tready(ddc_chain_tready), + .m_axis_dout_tdata({dds_out_q_tdata, dds_out_i_tdata}) + + ); + + //48 = WIDTH*2 + //chop off top byte because it's not actually used and we want to match expected gain/bit use found in freq shift + assign i_dds_clip = {dds_out_i_tdata[15:0],8'h00}; + assign q_dds_clip = {dds_out_q_tdata[15:0],8'h00}; + assign strobe_dds_clip = dds_out_tvalid & sample_out_tready; + assign last_cic_decimate_in = dds_out_tlast; + + /** CIC DECIMATE **/ + cic_decimate #(.WIDTH(WIDTH), .N(4), .MAX_RATE(CIC_MAX_DECIM)) cic_decimate_i ( + .clk(clk), .reset(reset | clear), + .rate_stb(rate_changed_stb), .rate(cic_decim_rate), .strobe_in(strobe_dds_clip), .strobe_out(strobe_cic), + .last_in(last_cic_decimate_in), .last_out(last_cic), .signal_in(i_dds_clip), .signal_out(i_cic)); + + cic_decimate #(.WIDTH(WIDTH), .N(4), .MAX_RATE(CIC_MAX_DECIM)) cic_decimate_q ( + .clk(clk), .reset(reset | clear), + .rate_stb(rate_changed_stb), .rate(cic_decim_rate), .strobe_in(strobe_dds_clip), .strobe_out(), + .last_in(1'b0), .last_out(), .signal_in(q_dds_clip), .signal_out(q_cic)); + + // Halfbands + wire nd1, nd2, nd3; + wire rfd1, rfd2, rfd3; + wire rdy1, rdy2, rdy3; + wire data_valid1, data_valid2, data_valid3; + + localparam HB1_SCALE = 18; + localparam HB2_SCALE = 18; + localparam HB3_SCALE = 18; + + // Track last sample as it propagates through the half band filters + // Note: Delays calibrated for specific pipeline delay in each hb filter + reg [5:0] hb1_in_cnt, hb2_in_cnt, hb3_in_cnt; + reg [4:0] hb1_out_cnt, hb2_out_cnt, hb3_out_cnt; + reg [4:0] hb1_last_cnt, hb2_last_cnt, hb3_last_cnt; + reg hb1_last_set, hb2_last_set, hb3_last_set; + reg last_hb1, last_hb2, last_hb3; + always @(posedge clk) begin + if (reset | clear) begin + hb1_in_cnt <= 'd0; + hb2_in_cnt <= 'd0; + hb3_in_cnt <= 'd0; + hb1_out_cnt <= 'd0; + hb2_out_cnt <= 'd0; + hb3_out_cnt <= 'd0; + hb1_last_cnt <= 'd0; + hb2_last_cnt <= 'd0; + hb3_last_cnt <= 'd0; + hb1_last_set <= 1'b0; + hb2_last_set <= 1'b0; + hb3_last_set <= 1'b0; + last_hb1 <= 1'b0; + last_hb2 <= 1'b0; + last_hb3 <= 1'b0; + end else begin + // HB1 + if (strobe_cic & rfd1) begin + hb1_in_cnt <= hb1_in_cnt + 1'b1; + if (last_cic) begin + hb1_last_set <= 1'b1; + hb1_last_cnt <= hb1_in_cnt[5:1]; + end + end + if (strobe_hb1) begin + hb1_out_cnt <= hb1_out_cnt + 1'b1; + end + // Avoid subtracting 1 from hb1_last_cnt by initializing hb1_out_cnt = 1 + if (hb1_last_set & (hb1_out_cnt == hb1_last_cnt)) begin + last_hb1 <= 1'b1; + hb1_last_set <= 1'b0; + hb1_last_cnt <= 'd0; + end else if (last_hb1 & strobe_hb1 & rfd2) begin + last_hb1 <= 1'b0; + end + // HB2 + if (strobe_hb1 & rfd2) begin + hb2_in_cnt <= hb2_in_cnt + 1'b1; + if (last_hb1) begin + hb2_last_set <= 1'b1; + hb2_last_cnt <= hb2_in_cnt[5:1]; + end + end + if (strobe_hb2) begin + hb2_out_cnt <= hb2_out_cnt + 1'b1; + end + if (hb2_last_set & (hb2_out_cnt == hb2_last_cnt)) begin + last_hb2 <= 1'b1; + hb2_last_set <= 1'b0; + hb2_last_cnt <= 'd0; + end else if (last_hb2 & strobe_hb2 & rfd3) begin + last_hb2 <= 1'b0; + end + // HB3 + if (strobe_hb2 & rfd3) begin + hb3_in_cnt <= hb3_in_cnt + 1'b1; + if (last_hb2) begin + hb3_last_set <= 1'b1; + hb3_last_cnt <= hb3_in_cnt[5:1]; + end + end + if (strobe_hb3) begin + hb3_out_cnt <= hb3_out_cnt + 1'b1; + end + if (hb3_last_set & (hb3_out_cnt == hb3_last_cnt)) begin + last_hb3 <= 1'b1; + hb3_last_set <= 1'b0; + hb3_last_cnt <= 'd0; + end else if (last_hb3 & strobe_hb3) begin + last_hb3 <= 1'b0; + end + end + end + + // Each filter will accept N-1 samples before outputting + // a sample. This logic "preloads" the pipeline with 0s + // so the first sample in pushes out a sample. + reg [5:0] hb1_cnt, hb2_cnt, hb3_cnt; + reg hb1_en, hb2_en, hb3_en, hb1_rdy, hb2_rdy, hb3_rdy; + generate + if (PRELOAD_HBS) begin + always @(posedge clk) begin + if (reset | clear) begin + hb1_cnt <= 0; + hb2_cnt <= 0; + hb3_cnt <= 0; + hb1_en <= 1'b1; + hb2_en <= 1'b1; + hb3_en <= 1'b1; + hb1_rdy <= 1'b0; + hb2_rdy <= 1'b0; + hb3_rdy <= 1'b0; + end else begin + if (hb1_en & rfd1) begin + if (hb1_cnt < 47) begin + hb1_cnt <= hb1_cnt + 1; + end else begin + hb1_en <= 1'b0; + end + end + if (data_valid1) begin + hb1_rdy <= 1'b1; + end + if (hb2_en & rfd2) begin + if (hb2_cnt < 47) begin + hb2_cnt <= hb2_cnt + 1; + end else begin + hb2_en <= 1'b0; + end + end + if (data_valid2) begin + hb2_rdy <= 1'b1; + end + if (hb3_en & rfd3) begin + if (hb3_cnt < 63) begin + hb3_cnt <= hb3_cnt + 1; + end else begin + hb3_en <= 1'b0; + end + end + if (data_valid3) begin + hb3_rdy <= 1'b1; + end + end + end + end else begin + always @(*) begin + hb1_en <= 1'b0; + hb2_en <= 1'b0; + hb3_en <= 1'b0; + hb1_rdy <= 1'b1; + hb2_rdy <= 1'b1; + hb3_rdy <= 1'b1; + end + end + endgenerate + + assign ddc_chain_tready = sample_out_tready & hb1_rdy & hb2_rdy & hb3_rdy; + + assign strobe_hb1 = data_valid1 & hb1_rdy; + assign strobe_hb2 = data_valid2 & hb2_rdy; + assign strobe_hb3 = data_valid3 & hb3_rdy; + assign nd1 = strobe_cic | hb1_en; + assign nd2 = strobe_hb1 | hb2_en; + assign nd3 = strobe_hb2 | hb3_en; + generate //no point in using a for loop generate because each hb is different. + if( NUM_HB > 0) begin + hbdec1 hbdec1 ( + .clk(clk), // input clk + .sclr(reset | clear), // input sclr + .ce(1'b1), // input ce + .coef_ld(reload_go & reload_ld1), // input coef_ld + .coef_we(reload_go & reload_we1), // input coef_we + .coef_din(coef_din), // input [17 : 0] coef_din + .rfd(rfd1), // output rfd + .nd(nd1), // input nd + .din_1(i_cic), // input [23 : 0] din_1 + .din_2(q_cic), // input [23 : 0] din_2 + .rdy(rdy1), // output rdy + .data_valid(data_valid1), // output data_valid + .dout_1(i_hb1), // output [46 : 0] dout_1 + .dout_2(q_hb1)); // output [46 : 0] dout_2 + end else begin //if (NUM_HB <= 2) + assign rdy1 = 1'b1; + assign rfd1 = 1'b1; + assign data_valid1 = 1'b1; + assign i_hb1 = 'h0; + assign q_hb1 = 'h0; + end + if( NUM_HB > 1) begin + hbdec2 hbdec2 ( + .clk(clk), // input clk + .sclr(reset | clear), // input sclr + .ce(1'b1), // input ce + .coef_ld(reload_go & reload_ld2), // input coef_ld + .coef_we(reload_go & reload_we2), // input coef_we + .coef_din(coef_din), // input [17 : 0] coef_din + .rfd(rfd2), // output rfd + .nd(nd2), // input nd + .din_1(i_hb1[23+HB1_SCALE:HB1_SCALE]), // input [23 : 0] din_1 + .din_2(q_hb1[23+HB1_SCALE:HB1_SCALE]), // input [23 : 0] din_2 + .rdy(rdy2), // output rdy + .data_valid(data_valid2), // output data_valid + .dout_1(i_hb2), // output [46 : 0] dout_1 + .dout_2(q_hb2)); // output [46 : 0] dout_2 + end else begin //if (NUM_HB <= 2) + assign rdy2 = 1'b1; + assign rfd2 = 1'b1; + assign data_valid2 = 1'b1; + assign i_hb2 = 'h0; + assign q_hb2 = 'h0; + end + if( NUM_HB > 2) begin + hbdec3 hbdec3 ( + .clk(clk), // input clk + .sclr(reset | clear), // input sclr + .ce(1'b1), // input ce + .coef_ld(reload_go & reload_ld3), // input coef_ld + .coef_we(reload_go & reload_we3), // input coef_we + .coef_din(coef_din), // input [17 : 0] coef_din + .rfd(rfd3), // output rfd + .nd(nd3), // input nd + .din_1(i_hb2[23+HB2_SCALE:HB2_SCALE]), // input [23 : 0] din_1 + .din_2(q_hb2[23+HB2_SCALE:HB2_SCALE]), // input [23 : 0] din_2 + .rdy(rdy3), // output rdy + .data_valid(data_valid3), // output data_valid + .dout_1(i_hb3), // output [47 : 0] dout_1 + .dout_2(q_hb3)); // output [47 : 0] dout_2 + end else begin //if (NUM_HB <= 2) + assign rdy3 = 1'b1; + assign rfd3 = 1'b1; + assign data_valid3 = 1'b1; + assign i_hb3 = 'h0; + assign q_hb3 = 'h0; + end + endgenerate + reg [23:0] i_unscaled, q_unscaled; + reg strobe_unscaled; + reg last_unscaled; + //this state machine must be changed if the user wants 4 hbs + always @(posedge clk) begin + if (reset | clear) begin + i_unscaled <= 'd0; + q_unscaled <= 'd0; + last_unscaled <= 1'b0; + strobe_unscaled <= 1'b0; + end else begin + case(hb_rate) + 2'd0 : begin + last_unscaled <= last_cic; + strobe_unscaled <= strobe_cic; + i_unscaled <= i_cic[23:0]; + q_unscaled <= q_cic[23:0]; + end + 2'd1 : begin + last_unscaled <= last_hb1; + strobe_unscaled <= strobe_hb1; + i_unscaled <= i_hb1[23+HB1_SCALE:HB1_SCALE]; + q_unscaled <= q_hb1[23+HB1_SCALE:HB1_SCALE]; + end + 2'd2 : begin + last_unscaled <= last_hb2; + strobe_unscaled <= strobe_hb2; + i_unscaled <= i_hb2[23+HB2_SCALE:HB2_SCALE]; + q_unscaled <= q_hb2[23+HB2_SCALE:HB2_SCALE]; + end + 2'd3 : begin + last_unscaled <= last_hb3; + strobe_unscaled <= strobe_hb3; + i_unscaled <= i_hb3[23+HB3_SCALE:HB3_SCALE]; + q_unscaled <= q_hb3[23+HB3_SCALE:HB3_SCALE]; + end + endcase // case (hb_rate) + end + end + + wire [42:0] i_scaled, q_scaled; + wire [23:0] i_clip, q_clip; + reg strobe_scaled; + reg last_scaled; + wire strobe_clip; + reg [1:0] last_clip; + + MULT_MACRO #( + .DEVICE("7SERIES"), // Target Device: "VIRTEX5", "VIRTEX6", "SPARTAN6","7SERIES" + .LATENCY(1), // Desired clock cycle latency, 0-4 + .WIDTH_A(25), // Multiplier A-input bus width, 1-25 + .WIDTH_B(18)) // Multiplier B-input bus width, 1-18 + SCALE_I (.P(i_scaled), // Multiplier output bus, width determined by WIDTH_P parameter + .A({i_unscaled[23],i_unscaled}), // Multiplier input A bus, width determined by WIDTH_A parameter + .B(scale_factor), // Multiplier input B bus, width determined by WIDTH_B parameter + .CE(strobe_unscaled), // 1-bit active high input clock enable + .CLK(clk), // 1-bit positive edge clock input + .RST(reset | clear)); // 1-bit input active high reset + + MULT_MACRO #( + .DEVICE("7SERIES"), // Target Device: "VIRTEX5", "VIRTEX6", "SPARTAN6","7SERIES" + .LATENCY(1), // Desired clock cycle latency, 0-4 + .WIDTH_A(25), // Multiplier A-input bus width, 1-25 + .WIDTH_B(18)) // Multiplier B-input bus width, 1-18 + SCALE_Q (.P(q_scaled), // Multiplier output bus, width determined by WIDTH_P parameter + .A({q_unscaled[23],q_unscaled}), // Multiplier input A bus, width determined by WIDTH_A parameter + .B(scale_factor), // Multiplier input B bus, width determined by WIDTH_B parameter + .CE(strobe_unscaled), // 1-bit active high input clock enable + .CLK(clk), // 1-bit positive edge clock input + .RST(reset | clear)); // 1-bit input active high reset + + wire [31:0] sample_out; + reg sample_out_last; + + always @(posedge clk) begin + if (reset | clear) begin + strobe_scaled <= 1'b0; + last_scaled <= 1'b0; + last_clip <= 'd0; + sample_out_last <= 1'b0; + end else begin + strobe_scaled <= strobe_unscaled; + last_scaled <= last_unscaled; + last_clip[1:0] <= {last_clip[0], last_scaled}; + sample_out_last <= last_clip[1]; + end + end + + clip_reg #(.bits_in(29), .bits_out(24), .STROBED(1)) clip_i ( + .clk(clk), .reset(reset | clear), .in(i_scaled[42:14]), .strobe_in(strobe_scaled), .out(i_clip), .strobe_out(strobe_clip)); + clip_reg #(.bits_in(29), .bits_out(24), .STROBED(1)) clip_q ( + .clk(clk), .reset(reset | clear), .in(q_scaled[42:14]), .strobe_in(strobe_scaled), .out(q_clip), .strobe_out()); + + round_sd #(.WIDTH_IN(24), .WIDTH_OUT(16), .DISABLE_SD(1)) round_i ( + .clk(clk), .reset(reset | clear), .in(i_clip), .strobe_in(strobe_clip), .out(sample_out[31:16]), .strobe_out(sample_out_stb)); + round_sd #(.WIDTH_IN(24), .WIDTH_OUT(16), .DISABLE_SD(1)) round_q ( + .clk(clk), .reset(reset | clear), .in(q_clip), .strobe_in(strobe_clip), .out(sample_out[15:0]), .strobe_out()); + + //FIFO_SIZE = 8 infers a bram fifo + strobed_to_axi #( + .WIDTH(32), + .FIFO_SIZE(8)) + strobed_to_axi ( + .clk(clk), .reset(reset), .clear(clear), + .in_stb(sample_out_stb), .in_data(sample_out), .in_last(sample_out_last), + .o_tdata(sample_out_tdata), .o_tlast(sample_out_tlast), .o_tvalid(sample_out_tvalid), .o_tready(sample_out_tready)); + +endmodule // ddc_chain diff --git a/fpga/usrp3/lib/rfnoc/dds_freq_tune.v b/fpga/usrp3/lib/rfnoc/dds_freq_tune.v new file mode 100644 index 000000000..2491c01a1 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/dds_freq_tune.v @@ -0,0 +1,208 @@ +// +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// DDS frequency shift with complex multiply + +module dds_freq_tune #( + parameter WIDTH = 24, + parameter PHASE_WIDTH = 24, + parameter SIN_COS_WIDTH = 16, + parameter OUTPUT_WIDTH = 24 +)( + input clk, + input reset, + input eob, + input rate_changed, + input [15:0] dds_input_fifo_occupied, + /* IQ input */ + input [WIDTH*2-1:0] s_axis_din_tdata, + input s_axis_din_tlast, + input s_axis_din_tvalid, + output s_axis_din_tready, + /* Phase input from NCO */ + input [PHASE_WIDTH-1:0] s_axis_phase_tdata, + input s_axis_phase_tlast, + input s_axis_phase_tvalid, + output s_axis_phase_tready, + /* IQ output */ + output [OUTPUT_WIDTH*2-1:0] m_axis_dout_tdata, + output m_axis_dout_tlast, + output m_axis_dout_tvalid, + input m_axis_dout_tready, + + //debug signals + output [2:0] state_out, + output phase_valid_hold_out, + output [7:0] phase_invalid_wait_count_out, + output reset_dds_out, + output m_axis_dds_tlast_out, + output m_axis_dds_tvalid_out, + output m_axis_dds_tready_out, + output [SIN_COS_WIDTH*2-1:0] m_axis_dds_tdata_out //[31:16] = sin|q [15:0] cos|i +); + + //wires for dds output + wire m_axis_dds_tlast; + wire m_axis_dds_tvalid; + wire m_axis_dds_tready; + wire [SIN_COS_WIDTH*2-1:0] m_axis_dds_tdata; //[31:16] = sin|q [15:0] cos|i + reg reset_reg; + reg phase_valid_hold; + reg [7:0] phase_invalid_wait_count; + reg [2:0] state; + reg reset_dds = 1'b1; // Init DDS resets to 1, since simulation model + reg reset_dds_reg = 1'b1; // requires reset at time 0 to avoid failure. + reg phase_ready_wait; + wire s_axis_phase_tready_dds; + + //when we're holding valid, make ready low so no new data comes in. + assign s_axis_phase_tready = s_axis_phase_tready_dds & ~phase_valid_hold; + + localparam INIT = 3'b000; + localparam VALID = 3'b001; + localparam WAIT = 3'b010; + localparam HOLD_VALID = 3'b011; + + //reset needs to be 2 clk cycles minimum for Xilinx DDS IP + always @(posedge clk) begin + reset_reg <= reset; + reset_dds_reg <= reset_dds; + end + + //some logic to reset the dds when data is goes from valid to not valid + //also holds valid high until the pipeline has passed tlast through. + always @(posedge clk) begin + if(reset) begin + state <= INIT; + phase_valid_hold <= 1'b0; + phase_invalid_wait_count <= 16'h00; + reset_dds <= 1'b0; + end + else begin + case(state) + INIT: begin//init case + phase_valid_hold <= 1'b0; + phase_invalid_wait_count <= 16'h0000; + reset_dds <= 1'b0; + if(s_axis_phase_tvalid) begin + state <= VALID; + end + end + VALID: begin //valid data + if(~s_axis_phase_tvalid) begin + state <= WAIT; + end + end + WAIT: begin //wait until we either get valid data or don't + if(m_axis_dds_tready) begin //only increment when the downstream can accept data. + phase_invalid_wait_count <= phase_invalid_wait_count + 4'b1; + end + if(s_axis_phase_tvalid) begin //if we get valid data shortly after, then don't push data through and reset + state <= INIT; + end else begin + if(eob | (phase_invalid_wait_count >= 16'h40) | rate_changed ) begin //if a valid never comes, aka eob + state <= HOLD_VALID; + end + end + end + HOLD_VALID: begin//hold valid to finish pipeline. Apparently the dds IP won't empty without additional valids. + phase_valid_hold <= 1'b1; + // Wait for input FIFO to be empty + if (~s_axis_din_tvalid) begin + state <= INIT; + reset_dds <= 1'b1; + end + end + endcase + end + end + + //dds to generate sin/cos data from phase + dds_sin_cos_lut_only dds_inst ( + .aclk(clk), // input wire aclk + .aresetn(~(reset | reset_reg | reset_dds | reset_dds_reg)), // input wire aresetn active low rst + .s_axis_phase_tvalid(s_axis_phase_tvalid | phase_valid_hold), // input wire s_axis_phase_tvalid + .s_axis_phase_tready(s_axis_phase_tready_dds), // output wire s_axis_phase_tready + .s_axis_phase_tlast(s_axis_phase_tlast), //tlast + .s_axis_phase_tdata(s_axis_phase_tdata), // input wire [23 : 0] s_axis_phase_tdata + .m_axis_data_tvalid(m_axis_dds_tvalid), // output wire m_axis_data_tvalid + .m_axis_data_tready(m_axis_dds_tready), // input wire m_axis_data_tready + .m_axis_data_tlast(m_axis_dds_tlast), // input wire m_axis_data_tready + .m_axis_data_tdata(m_axis_dds_tdata) // output wire [31 : 0] m_axis_data_tdata + ); + + wire [WIDTH*2-1:0] mult_in_a_tdata; + wire mult_in_a_tvalid; + wire mult_in_a_tready; + wire mult_in_a_tlast; + wire [SIN_COS_WIDTH*2-1:0] mult_in_b_tdata; + wire mult_in_b_tvalid; + wire mult_in_b_tready; + wire mult_in_b_tlast; //no connect + wire [2*32-1:0] mult_out_tdata; + wire mult_out_tvalid; + wire mult_out_tready; + wire mult_out_tlast; + + axi_sync #( + .SIZE(2), + .WIDTH_VEC({SIN_COS_WIDTH*2, WIDTH*2}), + .FIFO_SIZE(0)) + axi_sync ( + .clk(clk), .reset(reset), .clear(), + .i_tdata({m_axis_dds_tdata,s_axis_din_tdata}), + .i_tlast({m_axis_dds_tlast,s_axis_din_tlast}), + .i_tvalid({m_axis_dds_tvalid,s_axis_din_tvalid}), + .i_tready({m_axis_dds_tready,s_axis_din_tready}), + .o_tdata({mult_in_b_tdata,mult_in_a_tdata}), + .o_tlast({mult_in_b_tlast,mult_in_a_tlast}), + .o_tvalid({mult_in_b_tvalid,mult_in_a_tvalid}), + .o_tready({mult_in_b_tready,mult_in_a_tready})); + + //a = input i/q data stream 48 bit i/q lower bits i, upper bits q + //b = output of dds 32 bit cos/sin. lower cos, upper sin + complex_multiplier_dds complex_mult_inst ( + .aclk(clk), // input wire aclk + .aresetn(~(reset | reset_reg)), // input wire aresetn + .s_axis_a_tvalid(mult_in_a_tvalid), // input wire s_axis_a_tvalid + .s_axis_a_tready(mult_in_a_tready), // output wire s_axis_a_tready + .s_axis_a_tlast(mult_in_a_tlast), // input wire s_axis_a_tlast + .s_axis_a_tdata({mult_in_a_tdata}), // input wire [47 : 0] s_axis_a_tdata + .s_axis_b_tvalid(mult_in_b_tvalid), // input wire s_axis_b_tvalid + .s_axis_b_tready(mult_in_b_tready), // output wire s_axis_b_tready + .s_axis_b_tlast(mult_in_b_tlast), // output wire s_axis_b_tlast + .s_axis_b_tdata(mult_in_b_tdata), // input wire [31 : 0] s_axis_b_tdata + .m_axis_dout_tvalid(mult_out_tvalid), // output wire m_axis_dout_tvalid + .m_axis_dout_tready(mult_out_tready), // input wire m_axis_dout_tready + .m_axis_dout_tlast(mult_out_tlast), // output wire m_axis_dout_tlast + .m_axis_dout_tdata(mult_out_tdata) // output wire [63 : 0] m_axis_dout_tdata + ); + + axi_round_complex #( + .WIDTH_IN(32), + .WIDTH_OUT(OUTPUT_WIDTH)) + axi_round_complex_inst ( + .clk(clk), + .reset(reset | reset_reg), + .i_tdata(mult_out_tdata), + .i_tlast(mult_out_tlast), + .i_tvalid(mult_out_tvalid), + .i_tready(mult_out_tready), + .o_tdata(m_axis_dout_tdata), + .o_tlast(m_axis_dout_tlast), + .o_tvalid(m_axis_dout_tvalid), + .o_tready(m_axis_dout_tready)); + + //debug + assign state_out = state; + assign phase_valid_hold_out = phase_valid_hold; + assign phase_invalid_wait_count_out = phase_invalid_wait_count; + assign reset_dds_out = reset_dds; + assign m_axis_dds_tlast_out = m_axis_dds_tlast; + assign m_axis_dds_tvalid_out = m_axis_dds_tvalid; + assign m_axis_dds_tready_out = m_axis_dds_tready; + assign m_axis_dds_tdata_out = m_axis_dds_tdata; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/dds_timed.v b/fpga/usrp3/lib/rfnoc/dds_timed.v new file mode 100644 index 000000000..fd03f6a23 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/dds_timed.v @@ -0,0 +1,290 @@ +// +// Copyright 2016 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// DDS that supports timed commands via the settings bus + +module dds_timed #( + parameter SR_FREQ_ADDR = 0, + parameter SR_SCALE_IQ_ADDR = 1, + parameter CMD_FIFO_SIZE = 5, + parameter WIDTH = 16, + parameter DDS_WIDTH = 24, + parameter PHASE_WIDTH = 24, + parameter PHASE_ACCUM_WIDTH = 32, + parameter SCALING_WIDTH = 18, + parameter HEADER_WIDTH = 128, + parameter HEADER_FIFO_SIZE = 5, + parameter SR_AWIDTH = 8, + parameter SR_DWIDTH = 32, + parameter SR_TWIDTH = 64 +)( + input clk, input reset, input clear, + output timed_cmd_fifo_full, + input set_stb, input [SR_AWIDTH-1:0] set_addr, input [SR_DWIDTH-1:0] set_data, + input [SR_TWIDTH-1:0] set_time, input set_has_time, + input [2*WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, input [HEADER_WIDTH-1:0] i_tuser, + output [2*WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready, output [HEADER_WIDTH-1:0] o_tuser +); + + /************************************************************************** + * Track VITA time + *************************************************************************/ + wire [2*WIDTH-1:0] int_tdata; + wire [HEADER_WIDTH-1:0] int_tuser; + wire int_tlast, int_tvalid, int_tready, int_tag; + wire [SR_AWIDTH-1:0] out_set_addr, timed_set_addr; + wire [SR_DWIDTH-1:0] out_set_data, timed_set_data; + wire out_set_stb, timed_set_stb; + wire eob; + + axi_tag_time #( + .WIDTH(2*WIDTH), + .NUM_TAGS(1), + .SR_TAG_ADDRS(SR_FREQ_ADDR)) + axi_tag_time ( + .clk(clk), + .reset(reset), + .clear(clear), + .tick_rate(16'd1), + .timed_cmd_fifo_full(timed_cmd_fifo_full), + .s_axis_data_tdata(i_tdata), .s_axis_data_tlast(i_tlast), + .s_axis_data_tvalid(i_tvalid), .s_axis_data_tready(i_tready), + .s_axis_data_tuser(i_tuser), + .m_axis_data_tdata(int_tdata), .m_axis_data_tlast(int_tlast), + .m_axis_data_tvalid(int_tvalid), .m_axis_data_tready(int_tready), + .m_axis_data_tuser(int_tuser), .m_axis_data_tag(int_tag), + .in_set_stb(set_stb), .in_set_addr(set_addr), .in_set_data(set_data), + .in_set_time(set_time), .in_set_has_time(set_has_time), + .out_set_stb(out_set_stb), .out_set_addr(out_set_addr), .out_set_data(out_set_data), + .timed_set_stb(timed_set_stb), .timed_set_addr(timed_set_addr), .timed_set_data(timed_set_data)); + + wire [2*WIDTH-1:0] dds_in_tdata, unused_tdata; + wire [HEADER_WIDTH-1:0] header_in_tdata, header_out_tdata, unused_tuser; + wire dds_in_tlast, dds_in_tvalid, dds_in_tready, dds_in_tag; + wire header_in_tvalid, header_in_tready, header_in_tlast, unused_tag; + wire header_out_tvalid, header_out_tready; + + split_stream #( + .WIDTH(2*WIDTH+HEADER_WIDTH+1), .ACTIVE_MASK(4'b0011)) + split_head ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({int_tdata,int_tuser,int_tag}), .i_tlast(int_tlast), + .i_tvalid(int_tvalid), .i_tready(int_tready), + .o0_tdata({dds_in_tdata,unused_tuser,dds_in_tag}), .o0_tlast(dds_in_tlast), + .o0_tvalid(dds_in_tvalid), .o0_tready(dds_in_tready), + .o1_tdata({unused_tdata,header_in_tdata,unused_tag}), .o1_tlast(header_in_tlast), + .o1_tvalid(header_in_tvalid), .o1_tready(header_in_tready), + .o2_tready(1'b0), .o3_tready(1'b0)); + + axi_fifo #( + .WIDTH(HEADER_WIDTH), .SIZE(HEADER_FIFO_SIZE)) + axi_fifo_header ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata(header_in_tdata), .i_tvalid(header_in_tvalid & header_in_tlast), .i_tready(header_in_tready), + .o_tdata(header_out_tdata), .o_tvalid(header_out_tvalid), + .o_tready(header_out_tready), // Consume header on last output sample + .space(), .occupied()); + + assign eob = header_in_tdata[124]; + + /************************************************************************** + * Settings Regs + *************************************************************************/ + wire [PHASE_ACCUM_WIDTH-1:0] phase_inc_tdata, phase_inc_timed_tdata; + wire phase_inc_tlast, phase_inc_tvalid, phase_inc_tready; + wire phase_inc_timed_tlast, phase_inc_timed_tready , phase_inc_timed_tvalid; + + axi_setting_reg #( + .ADDR(SR_FREQ_ADDR), .AWIDTH(SR_AWIDTH), .WIDTH(PHASE_ACCUM_WIDTH), .STROBE_LAST(1)) + set_freq ( + .clk(clk), .reset(reset), + .set_stb(out_set_stb), .set_addr(out_set_addr), .set_data(out_set_data), + .o_tdata(phase_inc_tdata), .o_tlast(phase_inc_tlast), .o_tvalid(phase_inc_tvalid), .o_tready(phase_inc_tready)); + + axi_setting_reg #( + .ADDR(SR_FREQ_ADDR), .USE_FIFO(1), .FIFO_SIZE(CMD_FIFO_SIZE), .AWIDTH(SR_AWIDTH), .WIDTH(PHASE_ACCUM_WIDTH), .STROBE_LAST(1)) + set_freq_timed ( + .clk(clk), .reset(reset), + .set_stb(timed_set_stb), .set_addr(timed_set_addr), .set_data(timed_set_data), + .o_tdata(phase_inc_timed_tdata), .o_tlast(phase_inc_timed_tlast), .o_tvalid(phase_inc_timed_tvalid), .o_tready(phase_inc_timed_tready)); + + wire [SCALING_WIDTH-1:0] scaling_tdata; + wire scaling_tvalid, scaling_tready; + + axi_setting_reg #( + .ADDR(SR_SCALE_IQ_ADDR), .AWIDTH(SR_AWIDTH), .WIDTH(SCALING_WIDTH), .REPEATS(1)) + set_scale ( + .clk(clk), .reset(reset), + .set_stb(out_set_stb), .set_addr(out_set_addr), .set_data(out_set_data), + .o_tdata(scaling_tdata), .o_tlast(), .o_tvalid(scaling_tvalid), .o_tready(scaling_tready)); + + /************************************************************************** + * DDS + Complex Mult + Phase Accumulator + *************************************************************************/ + wire [PHASE_ACCUM_WIDTH-1:0] phase_inc_mux_tdata; + reg [PHASE_ACCUM_WIDTH-1:0] phase_inc; + wire phase_inc_mux_tlast, phase_inc_mux_tvalid, phase_inc_mux_tready; + reg [PHASE_ACCUM_WIDTH-1:0] phase; + + wire [PHASE_WIDTH-1:0] phase_tdata = phase[PHASE_ACCUM_WIDTH-1:PHASE_ACCUM_WIDTH-PHASE_WIDTH]; + wire phase_tvalid, phase_tready, phase_tlast; + + wire [WIDTH*2-1:0] dds_in_fifo_tdata; + wire dds_in_fifo_tvalid, dds_in_fifo_tready, dds_in_fifo_tlast; + wire dds_out_tlast, dds_out_tvalid, dds_out_tready; + + wire [DDS_WIDTH-1:0] dds_in_i_tdata, dds_in_q_tdata; + wire [DDS_WIDTH-1:0] dds_out_i_tdata, dds_out_q_tdata; + wire [15:0] dds_input_fifo_space, dds_input_fifo_occupied; + + wire [WIDTH*2-1:0] dds_in_sync_tdata; + wire dds_in_sync_tvalid, dds_in_sync_tready, dds_in_sync_tlast; + wire [PHASE_WIDTH-1:0] phase_sync_tdata; + wire phase_sync_tvalid, phase_sync_tready, phase_sync_tlast; + + assign phase_inc_mux_tdata = phase_inc_timed_tready ? phase_inc_timed_tdata : phase_inc_tdata; + assign phase_inc_mux_tlast = phase_inc_timed_tready ? phase_inc_timed_tlast : phase_inc_tlast; + assign phase_inc_mux_tvalid = phase_inc_timed_tready ? phase_inc_timed_tvalid : phase_inc_tvalid; + assign phase_inc_tready = phase_inc_mux_tready; + assign phase_inc_timed_tready = phase_inc_mux_tready & dds_in_tag; + assign phase_inc_mux_tready = phase_tready; + + // phase is only valid when input i/q data stream is valid + assign phase_tvalid = dds_in_tvalid; + assign phase_tlast = dds_in_tlast; + + always @(posedge clk) begin + if (reset | clear) begin + phase_inc <= 0; + end else if (phase_inc_mux_tvalid & phase_inc_mux_tready) begin + phase_inc <= phase_inc_mux_tdata; + end + end + + // NCO, increment phase input to DDS SIN/COS LUT + always @(posedge clk) begin + if (reset | clear | (phase_inc_mux_tvalid & phase_inc_mux_tready) | eob) begin + phase <= 0; + end else if (dds_in_tvalid & dds_in_tready) begin //only increment phase when data into dds is valid and data fifo is ready + phase <= phase + phase_inc; + end + end + + + // Sync the two path's pipeline delay. + // This is needed to ensure that applying the phase update happens on the + // correct sample regardless of differing downstream path delays. + axi_sync #( + .SIZE(2), + .WIDTH_VEC({PHASE_WIDTH,2*WIDTH}), // Vector of widths, each width is defined by a 32-bit value + .FIFO_SIZE(0)) + axi_sync ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({phase_tdata,dds_in_tdata}), + .i_tlast({phase_tlast,dds_in_tlast}), + .i_tvalid({phase_tvalid,dds_in_tvalid}), + .i_tready({phase_tready,dds_in_tready}), + .o_tdata({phase_sync_tdata,dds_in_sync_tdata}), + .o_tlast({phase_sync_tlast,dds_in_sync_tlast}), + .o_tvalid({phase_sync_tvalid,dds_in_sync_tvalid}), + .o_tready({phase_sync_tready,dds_in_sync_tready})); + + // fifo to hold input data while pipeline catches up in dds + // this is blocked by the axi_sync following the dds + axi_fifo #(.WIDTH(2*WIDTH+1), .SIZE(5)) dds_input_fifo( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({dds_in_sync_tlast,dds_in_sync_tdata}), .i_tvalid(dds_in_sync_tvalid), .i_tready(dds_in_sync_tready), + .o_tdata({dds_in_fifo_tlast,dds_in_fifo_tdata}), .o_tvalid(dds_in_fifo_tvalid), .o_tready(dds_in_fifo_tready), + .space(dds_input_fifo_space), .occupied(dds_input_fifo_occupied) + ); + + // after fifo, do q quick sign extend op to get up to 24 bits. to match how the cordic deals with the data path. + sign_extend #( + .bits_in(WIDTH), .bits_out(DDS_WIDTH)) + sign_extend_dds_i ( + .in(dds_in_fifo_tdata[2*WIDTH-1:WIDTH]), .out(dds_in_i_tdata)); + + sign_extend #( + .bits_in(WIDTH), .bits_out(DDS_WIDTH)) + sign_extend_dds_q ( + .in(dds_in_fifo_tdata[WIDTH-1:0]), .out(dds_in_q_tdata)); + + + // Wrapper for Xilinx IP AXI DDS + Complex Multiply + // NOTE: Seems Xilinx IP expects opposite I/Q combined complex data buses, so they are swapped here. + dds_freq_tune dds_freq_tune_inst ( + .clk(clk), + .reset(reset | clear), + .eob(eob), + .rate_changed(1'b0), + .dds_input_fifo_occupied(dds_input_fifo_occupied), + /* IQ input */ + .s_axis_din_tlast(dds_in_fifo_tlast), + .s_axis_din_tvalid(dds_in_fifo_tvalid), + .s_axis_din_tready(dds_in_fifo_tready), + .s_axis_din_tdata({dds_in_q_tdata, dds_in_i_tdata}), + /* Phase input from NCO */ + .s_axis_phase_tlast(phase_sync_tlast), + .s_axis_phase_tvalid(phase_sync_tvalid), + .s_axis_phase_tready(phase_sync_tready), + .s_axis_phase_tdata(phase_sync_tdata), //24 bit + /* IQ output */ + .m_axis_dout_tlast(dds_out_tlast), + .m_axis_dout_tvalid(dds_out_tvalid), + .m_axis_dout_tready(dds_out_tready), + .m_axis_dout_tdata({dds_out_q_tdata, dds_out_i_tdata}) + //debug signals + ); + /************************************************************************ + * Perform scaling on the IQ output + ************************************************************************/ + wire [DDS_WIDTH+SCALING_WIDTH-1:0] scaled_i_tdata, scaled_q_tdata; + wire scaled_tlast, scaled_tvalid, scaled_tready; + + mult #( + .WIDTH_A(DDS_WIDTH), + .WIDTH_B(SCALING_WIDTH), + .WIDTH_P(DDS_WIDTH+SCALING_WIDTH), + .DROP_TOP_P(4), + .LATENCY(3), + .CASCADE_OUT(0)) + i_mult ( + .clk(clk), .reset(reset | clear), + .a_tdata(dds_out_i_tdata), .a_tlast(dds_out_tlast), .a_tvalid(dds_out_tvalid), .a_tready(dds_out_tready), + .b_tdata(scaling_tdata), .b_tlast(1'b0), .b_tvalid(dds_out_tvalid /* aligning scaling_tdata with dds_tdata */), .b_tready(scaling_tready), + .p_tdata(scaled_i_tdata), .p_tlast(scaled_tlast), .p_tvalid(scaled_tvalid), .p_tready(scaled_tready)); + + mult #( + .WIDTH_A(DDS_WIDTH), + .WIDTH_B(SCALING_WIDTH), + .WIDTH_P(DDS_WIDTH+SCALING_WIDTH), + .DROP_TOP_P(4), + .LATENCY(3), + .CASCADE_OUT(0)) + q_mult ( + .clk(clk), .reset(reset | clear), + .a_tdata(dds_out_q_tdata), .a_tlast(), .a_tvalid(dds_out_tvalid), .a_tready(), + .b_tdata(scaling_tdata), .b_tlast(1'b0), .b_tvalid(dds_out_tvalid /* aligning scaling_tdata with dds_tdata */), .b_tready(), + .p_tdata(scaled_q_tdata), .p_tlast(), .p_tvalid(), .p_tready(scaled_tready)); + + wire [2*WIDTH-1:0] sample_tdata; + wire sample_tlast, sample_tvalid, sample_tready; + + axi_round_and_clip_complex #( + .WIDTH_IN(DDS_WIDTH+SCALING_WIDTH), .WIDTH_OUT(WIDTH), .CLIP_BITS(12)) + axi_round_and_clip_complex ( + .clk(clk), .reset(reset | clear), + .i_tdata({scaled_i_tdata, scaled_q_tdata}), .i_tlast(scaled_tlast), .i_tvalid(scaled_tvalid), .i_tready(scaled_tready), + .o_tdata(sample_tdata), .o_tlast(sample_tlast), .o_tvalid(sample_tvalid), .o_tready(sample_tready)); + + // Throttle output on last sample if header is not valid + assign header_out_tready = sample_tlast & sample_tvalid & o_tready; + assign sample_tready = (sample_tvalid & sample_tlast) ? (header_out_tvalid & o_tready) : o_tready; + assign o_tvalid = (sample_tvalid & sample_tlast) ? header_out_tvalid : sample_tvalid; + assign o_tlast = sample_tlast; + assign o_tdata = sample_tdata; + assign o_tuser = header_out_tdata; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/delay_fifo.v b/fpga/usrp3/lib/rfnoc/delay_fifo.v new file mode 100644 index 000000000..ad13392c8 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/delay_fifo.v @@ -0,0 +1,41 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module delay_fifo + #(parameter MAX_LEN=1023, + parameter WIDTH=16) + (input clk, input reset, input clear, + input [$clog2(MAX_LEN+1)-1:0] len, + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + reg [$clog2(MAX_LEN+1)-1:0] full_count; + wire full = full_count == len; + + wire do_op = i_tvalid & o_tready; + + assign i_tready = o_tready; + assign o_tvalid = i_tvalid; + + wire [WIDTH-1:0] fifo_out; + + axi_fifo #(.WIDTH(WIDTH), .SIZE($clog2(MAX_LEN+1))) sample_fifo + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata(i_tdata), .i_tvalid(do_op), .i_tready(), + .o_tdata(fifo_out), .o_tvalid(), .o_tready(do_op&full)); + + always @(posedge clk) + if(reset | clear) + full_count <= 0; + else + if(do_op & ~full) + full_count <= full_count + 1; // FIXME careful if len changes during operation you must clear + + assign o_tdata = full ? fifo_out : 0; + assign o_tlast = i_tlast; + +endmodule // delay_fifo diff --git a/fpga/usrp3/lib/rfnoc/delay_type2.v b/fpga/usrp3/lib/rfnoc/delay_type2.v new file mode 100644 index 000000000..6403f23b2 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/delay_type2.v @@ -0,0 +1,35 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This delay doesn't use a fifo, and solves pipeline bubble issues. +// FIXME -- issues are that it will generate output without input, and you can't reduce delay, only increase + +module delay_type2 + #(parameter MAX_LEN_LOG2=10, + parameter WIDTH=16, + parameter DELAY_VAL=0) + (input clk, input reset, input clear, + input [MAX_LEN_LOG2-1:0] len, + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + reg [MAX_LEN_LOG2-1:0] delay_count; + + wire delay_done = delay_count >= len; + + always @(posedge clk) + if(reset) + delay_count <= 0; + else + if(~delay_done & o_tvalid & o_tready) + delay_count <= delay_count + 1; + + assign o_tdata = delay_done ? i_tdata : DELAY_VAL; + assign o_tlast = delay_done ? i_tlast : 1'b0; // FIXME think about this more, no answer is perfect in all situations + assign o_tvalid = delay_done ? i_tvalid : 1'b1; + assign i_tready = delay_done ? o_tready : 1'b0; + +endmodule // delay_type2 diff --git a/fpga/usrp3/lib/rfnoc/delay_type3.v b/fpga/usrp3/lib/rfnoc/delay_type3.v new file mode 100644 index 000000000..6b2dfa1fd --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/delay_type3.v @@ -0,0 +1,32 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module delay_type3 + #(parameter FIFOSIZE=5, + parameter MAX_LEN_LOG2=10, + parameter WIDTH=16, + parameter DELAY_VAL=0) + (input clk, input reset, input clear, + input [MAX_LEN_LOG2-1:0] len, + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + wire [WIDTH-1:0] int_tdata; + wire int_tlast, int_tvalid, int_tready; + + axi_fifo #(.WIDTH(WIDTH+1), .SIZE(MAX_LEN_LOG2)) sample_fifo + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata({i_tlast,i_tdata}), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata({int_tlast,int_tdata}), .o_tvalid(int_tvalid), .o_tready(int_tready)); + + delay_type2 #(.MAX_LEN_LOG2(MAX_LEN_LOG2), .WIDTH(WIDTH), .DELAY_VAL(DELAY_VAL)) delay + (.clk(clk), .reset(reset), .clear(clear), + .len(len), + .i_tdata(int_tdata), .i_tlast(int_tlast), .i_tvalid(int_tvalid), .i_tready(int_tready), + .o_tdata(o_tdata), .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready)); + +endmodule // delay_type3
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/delay_type4.v b/fpga/usrp3/lib/rfnoc/delay_type4.v new file mode 100644 index 000000000..c1f3360ae --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/delay_type4.v @@ -0,0 +1,38 @@ +// +// Copyright 2018 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This delay doesn't use a fifo, and solves pipeline bubble issues. +// fixes some issues that seemed to occur with delay_type2: +// - o_tvalid is set to 0 when delay_done is 0 +// - added the clear signal +// - i_tvalid is a combinational input to incrementing delay_count + +module delay_type4 + #(parameter MAX_LEN_LOG2=4, + parameter WIDTH=16, + parameter DELAY_VAL=0) + (input clk, input reset, input clear, + input [MAX_LEN_LOG2-1:0] len, + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + reg [MAX_LEN_LOG2-1:0] delay_count; + + wire delay_done = delay_count >= len; + + always @(posedge clk) + if(reset | clear) + delay_count <= 0; + else + if(~delay_done & i_tvalid & o_tready) + delay_count <= delay_count + 1; + + assign o_tdata = delay_done ? i_tdata : DELAY_VAL; + assign o_tlast = delay_done ? i_tlast : 1'b0; // FIXME (carried over from delay_type2) think about this more, no answer is perfect in all situations + assign o_tvalid = delay_done ? i_tvalid : 1'b0; + assign i_tready = delay_done ? o_tready : 1'b0; + +endmodule // delay_type4 diff --git a/fpga/usrp3/lib/rfnoc/duc.v b/fpga/usrp3/lib/rfnoc/duc.v new file mode 100644 index 000000000..7a4fcc602 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/duc.v @@ -0,0 +1,275 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +//! RFNoC specific digital up-conversion chain +// High level block diagram: +// +// HB1 -> HB2 -> CIC -> DDS/multiplier -> Scaler + +// We don't care about framing here, hence no tlast + +module duc #( + parameter SR_PHASE_INC_ADDR = 0, + parameter SR_SCALE_ADDR = 1, + parameter SR_INTERP_ADDR = 2, + parameter NUM_HB = 2, + parameter CIC_MAX_INTERP = 128 + +)( + input clk, input reset, input clear, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + input [31:0] i_tdata, input [127:0] i_tuser, input i_tvalid, output i_tready, + output [31:0] o_tdata, output [127:0] o_tuser, output o_tvalid, input o_tready +); + + localparam RESET_DELAY = 3; + + localparam WIDTH = 16; // Input/output bitwidth of the module + localparam CWIDTH = 24; // Internal bitwidth needed for CORDIC accuracy + localparam PWIDTH = 32; // Phase accumulator bitwidth + + reg [1:0] hb_rate; // Current Halfband rate + reg [7:0] cic_interp_rate; // Current CIC rate + + wire [1:0] hb_rate_int; + wire [7:0] cic_interp_rate_int; + + wire [2*CWIDTH-1:0] o_tdata_halfbands; // Halfband output + wire o_tvalid_halfbands; + + wire rate_changed; // Rate changed by the settings registers + wire reset_on_change; // Reset the halfbands and the cic everytime there is a rate change + wire reset_on_live_change; // Reset when rate changes while streaming + + wire [PWIDTH-1:0] o_tdata_phase; + wire o_tvalid_phase; + wire o_tlast_phase; + wire i_tready_phase; + + wire [17:0] scale_factor; + + /************************************************************************** + * Settings registers + **************************************************************************/ + // AXI settings bus for phase values + axi_setting_reg #( + .ADDR(SR_PHASE_INC_ADDR), .AWIDTH(8), .WIDTH(PWIDTH), .STROBE_LAST(1), .REPEATS(1)) + axi_sr_phase ( + .clk(clk), .reset(reset), + .set_stb(set_stb), .set_addr(set_addr), .set_data(set_data), + .o_tdata(o_tdata_phase), .o_tlast(o_tlast_phase), .o_tvalid(o_tvalid_phase), .o_tready(i_tready_phase)); + + // AXI settings bus for scale + setting_reg #(.my_addr(SR_SCALE_ADDR), .width(18)) sr_scale ( + .clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(scale_factor),.changed()); + + // AXI settings bus for interpolation rate + setting_reg #(.my_addr(SR_INTERP_ADDR), .width(10), .at_reset(1)) sr_interp + (.clk(clk),.rst(reset),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out({hb_rate_int,cic_interp_rate_int}),.changed(rate_changed)); + + // Changing interpolation rates while processing only when axi_rate_change sends a clear + reg active, rate_changed_hold; + reg [RESET_DELAY-1:0] shift_reset; + always @(posedge clk) begin + if (reset) begin + active <= 1'b0; + rate_changed_hold <= 1'b0; + cic_interp_rate <= 'd1; + hb_rate <= 'd0; + shift_reset <= 'd0; + end else begin + if (clear | reset_on_change) begin + active <= 1'b0; + end else if (i_tready & i_tvalid) begin + active <= 1'b1; + end + if (rate_changed & active) begin + rate_changed_hold <= 1'b1; + end + if ((clear | ~active) & (rate_changed | rate_changed_hold)) begin + rate_changed_hold <= 1'b0; + cic_interp_rate <= cic_interp_rate_int; + hb_rate <= hb_rate_int; + shift_reset <= {shift_reset[RESET_DELAY-1:0], 1'b1}; + end else begin + shift_reset <= {shift_reset[RESET_DELAY-1:0], 1'b0}; + end + end + end + + // Long reset for the halfbands + assign reset_on_change = |shift_reset; + assign reset_on_live_change = (clear | reset_on_change | (~active & rate_changed)); + + /************************************************************************** + * Halfbands + *************************************************************************/ + + // Sign extend from 16 to 24 bits to increase the accuracy from the frequency shifter + wire [2*CWIDTH-1:0] o_tdata_extd; + + sign_extend #(.bits_in(WIDTH), .bits_out(CWIDTH)) sign_extend_in_i ( + .in(i_tdata[2*WIDTH-1:WIDTH]), .out(o_tdata_extd[2*CWIDTH-1:CWIDTH])); + + sign_extend #(.bits_in(WIDTH), .bits_out(CWIDTH)) sign_extend_in_q ( + .in(i_tdata[WIDTH-1:0]), .out(o_tdata_extd[CWIDTH-1:0])); + + // Halfband 1 wires + wire i_tready_hb1; + wire [2*CWIDTH-1:0] o_tdata_hb1; + wire o_tvalid_hb1, o_tready_hb1; + // Halfband 2 wires + wire i_tready_hb2; + wire [2*CWIDTH-1:0] o_tdata_hb2; + wire o_tvalid_hb2, o_tready_hb2; + // Halfband 3 wires + wire i_tready_hb3; + wire [2*CWIDTH-1:0] o_tdata_hb3; + wire o_tvalid_hb3, o_tready_hb3; + generate + if( NUM_HB > 0 ) begin + axi_hb47 halfband1 ( + .aclk(clk), + .aresetn(~(reset | clear | reset_on_change)), + .s_axis_data_tvalid(i_tvalid), + .s_axis_data_tready(i_tready_hb1), + .s_axis_data_tdata(o_tdata_extd), + .m_axis_data_tvalid(o_tvalid_hb1), + .m_axis_data_tready(o_tready_hb1), + .m_axis_data_tdata(o_tdata_hb1) + ); + end else begin + assign o_tdata_hb1 = 'h0; + assign o_tvalid_hb1 = 1'h0; + assign i_tready_hb1 = 1'b0; + end + if( NUM_HB > 1 ) begin + axi_hb47 halfband2 ( + .aclk(clk), + .aresetn(~(reset | clear | reset_on_change)), + .s_axis_data_tvalid(o_tvalid_hb1), + .s_axis_data_tready(i_tready_hb2), + .s_axis_data_tdata({o_tdata_hb1[2*CWIDTH-1:CWIDTH] << 2, o_tdata_hb1[CWIDTH-1:0] << 2}), + .m_axis_data_tvalid(o_tvalid_hb2), + .m_axis_data_tready(o_tready_hb2), + .m_axis_data_tdata(o_tdata_hb2) + ); + end else begin + assign o_tdata_hb2 = 'h0; + assign o_tvalid_hb2 = 1'h0; + assign i_tready_hb2 = 1'b0; + end + if( NUM_HB > 2 ) begin + axi_hb47 halfband3 ( + .aclk(clk), + .aresetn(~(reset | clear | reset_on_change)), + .s_axis_data_tvalid(o_tvalid_hb2), + .s_axis_data_tready(i_tready_hb3), + .s_axis_data_tdata({o_tdata_hb2[2*CWIDTH-1:CWIDTH] << 2, o_tdata_hb2[CWIDTH-1:0] << 2}), + .m_axis_data_tvalid(o_tvalid_hb3), + .m_axis_data_tready(o_tready_hb3), + .m_axis_data_tdata(o_tdata_hb3) + ); + end else begin + assign o_tdata_hb3 = 'h0; + assign o_tvalid_hb3 = 1'h0; + assign i_tready_hb3 = 1'b0; + end + endgenerate + /************************************************************************** + * Halfband selection multiplexing + *************************************************************************/ + wire [2*CWIDTH-1:0] o_tdata_cic; + wire [2*CWIDTH-1:0] o_cic; + wire o_tvalid_cic, i_tready_cic; + wire o_tready_cic; + + assign o_tdata_halfbands = (hb_rate == 2'b0) ? o_tdata_extd : + (hb_rate == 2'b1) ? {o_tdata_hb1[2*CWIDTH-1:CWIDTH] << 2, o_tdata_hb1[CWIDTH-1:0] << 2} : + (hb_rate == 2'b10) ? {o_tdata_hb2[2*CWIDTH-1:CWIDTH] << 2, o_tdata_hb2[CWIDTH-1:0] << 2} : + {o_tdata_hb3[2*CWIDTH-1:CWIDTH] << 2, o_tdata_hb3[CWIDTH-1:0] << 2}; + // Clearing valid on rate change as the halfbands take 2 cycles to clear + assign o_tvalid_halfbands = reset_on_live_change ? 1'b0 : + (hb_rate == 2'b0) ? i_tvalid : + (hb_rate == 2'b1) ? o_tvalid_hb1 : + (hb_rate == 2'b10) ? o_tvalid_hb2 : + o_tvalid_hb3; + // Throttle input data while rate change is going on + assign i_tready = reset_on_live_change ? 1'b0 : + (hb_rate == 2'b0) ? i_tready_cic : + i_tready_hb1; + assign o_tready_hb1 = reset_on_live_change ? 1'b0 : + (hb_rate == 2'b1) ? i_tready_cic : + i_tready_hb2; + assign o_tready_hb2 = reset_on_live_change ? 1'b0 : + (hb_rate == 2'b10) ? i_tready_cic : + i_tready_hb3; + + assign o_tready_hb3 = reset_on_live_change ? 1'b0 : i_tready_cic; + + /************************************************************************** + * Ettus CIC; the Xilinx CIC has a minimum interpolation of 4, + * so we use the strobed version and convert to and from AXI. + *************************************************************************/ + wire to_cic_stb, from_cic_stb; + wire [2*CWIDTH-1:0] to_cic_data; + wire [CWIDTH-1:0] i_cic; + wire [CWIDTH-1:0] q_cic; + + // Convert from AXI to strobed and back to AXI again for the CIC interpolation module + axi_to_strobed #(.WIDTH(2*CWIDTH), .FIFO_SIZE(1), .MIN_RATE(128)) axi_to_strobed ( + .clk(clk), .reset(reset | reset_on_change), .clear(clear), + .out_rate(cic_interp_rate), .ready(i_tready_cartesian & o_tready), .error(), + .i_tdata(o_tdata_halfbands), .i_tvalid(o_tvalid_halfbands), .i_tlast(1'b0), .i_tready(i_tready_cic), + .out_stb(to_cic_stb), .out_last(), .out_data(to_cic_data) + ); + + cic_interpolate #(.WIDTH(CWIDTH), .N(4), .MAX_RATE(CIC_MAX_INTERP)) cic_interpolate_i ( + .clk(clk), .reset(reset | clear | reset_on_change), + .rate_stb(reset_on_change), + .rate(cic_interp_rate), .strobe_in(to_cic_stb), .strobe_out(from_cic_stb), + .signal_in(to_cic_data[2*CWIDTH-1:CWIDTH]), .signal_out(i_cic) + ); + + cic_interpolate #(.WIDTH(CWIDTH), .N(4), .MAX_RATE(CIC_MAX_INTERP)) cic_interpolate_q ( + .clk(clk), .reset(reset | clear | reset_on_change), + .rate_stb(reset_on_change), + .rate(cic_interp_rate), .strobe_in(to_cic_stb), .strobe_out(), + .signal_in(to_cic_data[CWIDTH-1:0]), .signal_out(q_cic) + ); + + assign o_cic = {i_cic, q_cic}; + + //FIFO_SIZE = 8 infers a bram fifo + strobed_to_axi #(.WIDTH(2*CWIDTH), .FIFO_SIZE(8)) strobed_to_axi ( + .clk(clk), .reset(reset | reset_on_change), .clear(clear), + .in_stb(from_cic_stb), .in_data(o_cic), .in_last(1'b0), + .o_tdata(o_tdata_cic), .o_tvalid(o_tvalid_cic), .o_tlast(), .o_tready(o_tready_cic) + ); + + + /************************************************************************** + * Clip back to 16 bits + *************************************************************************/ + wire o_tvalid_clip; + + axi_round_and_clip_complex #( + .WIDTH_IN(CWIDTH), .WIDTH_OUT(WIDTH), .CLIP_BITS(CWIDTH-WIDTH)) // No rounding, all clip + axi_round_and_clip_complex ( + .clk(clk), .reset(reset | clear | reset_on_change), + .i_tdata(o_tdata_cic), .i_tlast(1'b0), .i_tvalid(o_tvalid_cic), .i_tready(o_tready_cic), + .o_tdata(o_tdata), .o_tlast(), .o_tvalid(o_tvalid_clip), .o_tready(i_tready_cartesian)); + + assign o_tvalid = reset_on_live_change ? 1'b0 : o_tvalid_clip; + assign i_tready_cartesian = reset_on_live_change ? 1'b0 : o_tready; + + // Note: To facilitate timed tunes, the code has been moved outside + // the duc module to dds_timed.v. + +endmodule // duc diff --git a/fpga/usrp3/lib/rfnoc/fft_shift.v b/fpga/usrp3/lib/rfnoc/fft_shift.v new file mode 100644 index 000000000..453da8050 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fft_shift.v @@ -0,0 +1,198 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Arranges FFT output AXI stream packets so zero frequency bin is centered. Expects i_tuser to have FFT index. +// Intended to complement Xilinx Coregen AXI-stream FFT, but should work with any core with similar output. +// Works with natural and bit/digit reversed order. +// +// When using Xilinx FFT core, use bit/digit reversed order (versus natural order) to save resources +// +// Config bits: +// 0: Reverse output so positive frequencies are sent first +// 1: Bypass fft shift + +module fft_shift #( + parameter MAX_FFT_SIZE_LOG2 = 11, + parameter WIDTH = 32) +( + input clk, input reset, + input [1:0] config_tdata, input config_tvalid, output config_tready, + input [$clog2(MAX_FFT_SIZE_LOG2+1)-1:0] fft_size_log2_tdata, input fft_size_log2_tvalid, output fft_size_log2_tready, + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, input [MAX_FFT_SIZE_LOG2-1:0] i_tuser, + output [WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + reg ping_pong; + reg loading_pkt; + reg [2:0] reconfig_stall; + reg reverse, bypass; + reg [$clog2(MAX_FFT_SIZE_LOG2+1)-1:0] fft_size_log2_reg; + reg [MAX_FFT_SIZE_LOG2:0] fft_size; + reg [MAX_FFT_SIZE_LOG2-1:0] fft_size_minus_1, fft_shift_mask; + wire [WIDTH-1:0] ping_rd_data, pong_rd_data; + reg [MAX_FFT_SIZE_LOG2-1:0] ping_rd_addr, pong_rd_addr; + // t_user is the FFT index, this XOR is how the natural order FFT output is flipped to + // center the zero frequency bin in the middle. This is essentially adding half the FFT length to + // the write address without carrying, causing the upper half addresses to wrap around to the lower half + // and vice versa. + wire [MAX_FFT_SIZE_LOG2-1:0] ping_wr_addr = fft_shift_mask ^ i_tuser; + wire [MAX_FFT_SIZE_LOG2-1:0] pong_wr_addr = fft_shift_mask ^ i_tuser; + wire ping_wr_en = ping_pong ? i_tvalid & i_tready : 1'b0; + wire pong_wr_en = ping_pong ? 1'b0 : i_tvalid & i_tready; + // Always reads when loading ping/pong RAM so first word falls through. Avoids a bubble state. + wire ping_rd_en = ping_pong ? 1'b1 : o_tvalid & o_tready; + wire pong_rd_en = ping_pong ? o_tvalid & o_tready : 1'b1; + reg ping_loaded, pong_loaded; + // Only fill ping (or pong) RAM if it is empty and fft size has propagated through + assign i_tready = (ping_pong ? ~ping_loaded : ~pong_loaded) & ~reconfig_stall[2]; + reg ping_tlast, pong_tlast; + // Dump data in ping RAM (but only if it has been loaded!) while also loading in pong RAM and vice versa + assign o_tvalid = ping_pong ? pong_loaded : ping_loaded; + assign o_tlast = ping_pong ? pong_tlast : ping_tlast; + assign o_tdata = ping_pong ? pong_rd_data : ping_rd_data; + + // Prevent reconfiguration from occurring except at valid times. If the user violates tvalid rules + // (i.e. deasserts tvalid during the middle of a packet), could cause next output packet to have + // the wrong size. + assign config_tready = ~ping_loaded & ~pong_loaded & ~loading_pkt; + assign fft_size_log2_tready = config_tready; + + ram_2port #( + .DWIDTH(WIDTH), + .AWIDTH(MAX_FFT_SIZE_LOG2)) + ping_ram_2port ( + .clka(clk),.ena(1'b1),.wea(ping_wr_en),.addra(ping_wr_addr),.dia(i_tdata),.doa(), + .clkb(clk),.enb(ping_rd_en),.web(1'b0),.addrb(ping_rd_addr),.dib({WIDTH{1'b0}}),.dob(ping_rd_data)); + + ram_2port #( + .DWIDTH(WIDTH), + .AWIDTH(MAX_FFT_SIZE_LOG2)) + pong_ram_2port ( + .clka(clk),.ena(1'b1),.wea(pong_wr_en),.addra(pong_wr_addr),.dia(i_tdata),.doa(), + .clkb(clk),.enb(pong_rd_en),.web(1'b0),.addrb(pong_rd_addr),.dib({WIDTH{1'b0}}),.dob(pong_rd_data)); + + always @(posedge clk) begin + if (reset) begin + ping_pong <= 1'b1; + ping_loaded <= 1'b0; + pong_loaded <= 1'b0; + ping_rd_addr <= 0; + pong_rd_addr <= 0; + ping_tlast <= 1'b0; + pong_tlast <= 1'b0; + fft_shift_mask <= 0; + fft_size_minus_1 <= 0; + fft_size <= 0; + fft_size_log2_reg <= 0; + bypass <= 1'b0; + reverse <= 1'b0; + reconfig_stall <= 3'd0; + loading_pkt <= 1'b0; + end else begin + fft_size_minus_1 <= fft_size-1; + fft_size <= 1 << fft_size_log2_reg; + // Configure FFT shift mask such that the output order is either + // unaffected (bypass), positive frequencies first (reverse), or + // negative frequencies first + if (bypass) begin + fft_shift_mask <= 'd0; + end else if (reverse) begin + fft_shift_mask <= (fft_size-1) >> 1; + end else begin + fft_shift_mask <= fft_size >> 1; + end + + // Restrict updating + if (config_tready & config_tvalid) begin + reverse <= config_tdata[0]; + bypass <= config_tdata[1]; + reconfig_stall <= 3'b100; + end + // Restrict updating FFT size to valid times + // Also, deassert i_tready until updated fft size has propagated through + if (fft_size_log2_tready & fft_size_log2_tvalid) begin + fft_size_log2_reg <= fft_size_log2_tdata[$clog2(MAX_FFT_SIZE_LOG2)-1:0]; + reconfig_stall <= 3'b111; + end + if (~(config_tready & config_tvalid) & ~(fft_size_log2_tready & fft_size_log2_tvalid)) begin + reconfig_stall[0] <= 1'b0; + reconfig_stall[2:1] <= reconfig_stall[1:0]; + end + + // Used to disable reconfiguration when we are receiving a packet + if (i_tvalid & i_tready & ~i_tlast & ~loading_pkt) begin + loading_pkt <= 1'b1; + end else if (i_tvalid & i_tready & i_tlast & loading_pkt) begin + loading_pkt <= 1'b0; + end + + // Logic to simultaneously load ping RAM and unload pong RAM. Note, write address for ping RAM handled with i_tuser, + // so we only look for i_tlast instead of maintaining a write address counter. + if (ping_pong) begin + // Unload pong RAM + if (pong_loaded & o_tready & o_tvalid) begin + // i.e. pong_rd_addr == fft_size-1, more efficient to use tlast + if (pong_tlast) begin + // Special case: ping RAM loaded before pong RAM emptied + if (ping_loaded | (i_tvalid & i_tready & i_tlast)) begin + ping_pong <= ~ping_pong; + end + pong_tlast <= 1'b0; + pong_loaded <= 1'b0; + pong_rd_addr <= 0; + end else begin + pong_rd_addr <= pong_rd_addr + 1; + end + if (pong_rd_addr == fft_size_minus_1) begin + pong_tlast <= 1'b1; + end + end + // Ping RAM done loading + if (i_tvalid & i_tready & i_tlast) begin + // Value at addr 0 already loaded (see first word fall through and avoiding a bubble state comment above) + ping_rd_addr <= 1; + ping_loaded <= 1'b1; + // We can switch to the pong RAM only if it is empty (or about to be empty) + if (~pong_loaded) begin + ping_pong <= ~ping_pong; + end + end + // Special case: Ping and pong RAM loaded, wait until pong RAM unloaded. + if (ping_loaded & (pong_loaded & o_tvalid & o_tlast)) begin + ping_pong <= ~ping_pong; + end + // Same as above, just ping / pong switched + end else begin + if (ping_loaded & o_tready & o_tvalid) begin + if (ping_tlast) begin + if (pong_loaded | (i_tvalid & i_tready & i_tlast)) begin + ping_pong <= ~ping_pong; + end + ping_tlast <= 1'b0; + ping_loaded <= 1'b0; + ping_rd_addr <= 0; + end else begin + ping_rd_addr <= ping_rd_addr + 1; + end + if (ping_rd_addr == fft_size_minus_1) begin + ping_tlast <= 1'b1; + end + end + if (i_tvalid & i_tready & i_tlast) begin + pong_rd_addr <= 1; + pong_loaded <= 1'b1; + if (~ping_loaded | (ping_loaded & o_tvalid & o_tlast)) begin + ping_pong <= ~ping_pong; + end + end + if (pong_loaded & (ping_loaded & o_tvalid & o_tlast)) begin + ping_pong <= ~ping_pong; + end + end + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/file_sink.v b/fpga/usrp3/lib/rfnoc/file_sink.v new file mode 100644 index 000000000..3e4caaba1 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/file_sink.v @@ -0,0 +1,107 @@ +// +// Copyright 2015 National Instruments +// + +module file_sink #( + parameter SR_SWAP_SAMPLES = 0, // 0: Do nothing, 1: 8-bit swap, 2: 16-bit, 3: 32-bit + parameter SR_ENDIANNESS = 1, // 0: Do nothing, 1: 16-bit boundary, 2: 32-bit + // Default (after reset) values for above settings register set to sc16 (reverse endianess on + parameter DEFAULT_SWAP_SAMPLES = 2, + parameter DEFAULT_ENDIANNESS = 2, + parameter FILENAME = "") +( + input clk_i, + input rst_i, + + input set_stb_i, + input [7:0] set_addr_i, + input [31:0] set_data_i, + + input [63:0] i_tdata, + input i_tlast, + input i_tvalid, + output i_tready); + + integer file = 0; + reg hdr = 1'b1; + + wire [1:0] swap_samples; + wire [1:0] endianness; + + wire [63:0] data_int; + wire [63:0] data; + + setting_reg #( + .my_addr(SR_SWAP_SAMPLES), + .width(2), + .at_reset(DEFAULT_SWAP_SAMPLES)) + sr_swap_samples ( + .clk(clk_i), + .rst(rst_i), + .strobe(set_stb_i), + .addr(set_addr_i), + .in(set_data_i), + .out(swap_samples), + .changed()); + + setting_reg #( + .my_addr(SR_ENDIANNESS), + .width(2), + .at_reset(DEFAULT_ENDIANNESS)) + sr_endianness ( + .clk(clk_i), + .rst(rst_i), + .strobe(set_stb_i), + .addr(set_addr_i), + .in(set_data_i), + .out(endianness), + .changed()); + + // We're ready as soon as the file is open + assign i_tready = (file == 0) ? 1'b0 : 1'b1; + + // Swap samples + assign data_int = (swap_samples == 2'd0) ? i_tdata : + (swap_samples == 2'd1) ? {i_tdata[55:48], i_tdata[63:56], i_tdata[39:32], i_tdata[47:40], + i_tdata[23:16], i_tdata[31:24], i_tdata[ 7: 0], i_tdata[15: 8]} : + (swap_samples == 2'd2) ? {i_tdata[47:32], i_tdata[63:48], i_tdata[15: 0], i_tdata[31:16]} : + (swap_samples == 2'd3) ? {i_tdata[31:0], i_tdata[63:32]} : + 64'd0; + + // Swap endianness + assign data = (endianness == 2'd0) ? data_int : + (endianness == 2'd1) ? {data_int[47:32], data_int[63:48], data_int[15: 0], data_int[31:16]} : + (endianness == 2'd2) ? {data_int[39:32], data_int[47:40], data_int[55:48], data_int[63:56], + data_int[ 7: 0], data_int[15: 8], data_int[23:16], data_int[31:24]} : + 64'd0; + + initial begin + if (FILENAME != "") begin + file = $fopen(FILENAME, "wb"); + if(!file) + $error("Could not open file sink."); + $display("File sink ready."); + end + end + + always @(posedge clk_i) begin + if(rst_i) begin + hdr <= 1'b1; + end + else begin + if(i_tvalid) begin + if(hdr) begin + hdr <= 1'b0; + end + else begin + $fwrite(file, "%u", data); + end + end + + if(i_tlast) begin + hdr <= 1'b1; + end + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/file_source.v b/fpga/usrp3/lib/rfnoc/file_source.v new file mode 100644 index 000000000..160cd9984 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/file_source.v @@ -0,0 +1,183 @@ + +// Copyright 2014, Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +// Dummy data source. Turn it on by setting a packet length in its setting reg, turn it off by setting 0. +// Will generate as fast as it can. + +module file_source #( + parameter SR_ENABLE = 0, + parameter SR_PKT_LENGTH = 1, + parameter SR_RATE = 2, + parameter SR_SEND_TIME = 3, + parameter SR_SWAP_SAMPLES = 4, // 0: Do nothing, 1: 8-bit swap, 2: 16-bit, 3: 32-bit + parameter SR_ENDIANNESS = 5, // 0: Do nothing, 1: 16-bit boundary, 2: 32-bit + // Default (after reset) values for above settings register set to sc16 + parameter DEFAULT_SWAP_SAMPLES = 2, + parameter DEFAULT_ENDIANNESS = 2, + parameter FILE_LENGTH = 65536, // Bytes + parameter FILENAME="") +( + input clk, input reset, input [31:0] sid, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + output [63:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + reg [63:0] mem[0:FILE_LENGTH/8-1]; + integer file, file_length; + reg [$clog2(FILE_LENGTH/8)-1:0] index; + integer i; + + initial begin + if (FILENAME != "") begin + $readmemh(FILENAME, mem); + end + end + + wire [31:0] sid; + reg [11:0] seqnum; + wire [15:0] rate; + reg [1:0] state; + reg [15:0] line_number; + + wire [63:0] int_tdata; + wire int_tlast, int_tvalid, int_tready; + + wire enable; + wire [15:0] len; + reg [15:0] count; + wire send_time; + wire [1:0] swap_samples; + wire [1:0] endianness; + + setting_reg #(.my_addr(SR_ENABLE), .width(1)) sr_sid ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(enable), .changed()); + + setting_reg #(.my_addr(SR_PKT_LENGTH), .width(16)) sr_len ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(len), .changed()); + + setting_reg #(.my_addr(SR_RATE), .width(16)) sr_rate ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(rate), .changed()); + + setting_reg #(.my_addr(SR_SEND_TIME), .width(1)) sr_send_time ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(send_time), .changed()); + + setting_reg #( + .my_addr(SR_SWAP_SAMPLES), .width(2), .at_reset(DEFAULT_SWAP_SAMPLES)) + sr_swap_samples ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(swap_samples), .changed()); + + setting_reg #( + .my_addr(SR_ENDIANNESS), .width(2), .at_reset(DEFAULT_ENDIANNESS)) + sr_endianness ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(endianness), .changed()); + + localparam IDLE = 2'd0; + localparam HEAD = 2'd1; + localparam TIME = 2'd2; + localparam DATA = 2'd3; + + always @(posedge clk) begin + if(reset) begin + state <= IDLE; + count <= 0; + index <= 0; + seqnum <= 0; + end else begin + case (state) + IDLE : begin + if (len != 0) begin + state <= HEAD; + end + end + HEAD : begin + if (int_tvalid & int_tready) begin + count <= 1; + seqnum <= seqnum + 1; + if (send_time) begin + state <= TIME; + end else begin + state <= DATA; + end + end + end + TIME : begin + if (int_tvalid & int_tready) begin + state <= DATA; + end + end + DATA : begin + if (int_tvalid & int_tready) begin + index <= index + 1; + if (count == len) begin + state <= IDLE; + count <= 0; + end else begin + count <= count + 1; + end + end + end + default : state <= IDLE; + endcase + end + end + + reg [63:0] time_cnt; + always @(posedge clk) begin + if (reset) begin + time_cnt <= 'd0; + end else begin + time_cnt <= time_cnt + 1; + end + end + + wire [15:0] pkt_len = { len[12:0], 3'b000 } + 16'd8 + (send_time ? 16'd8 : 16'd0); + + wire [63:0] data_int = mem[index]; + // Swap endianness + wire [63:0] data = (endianness == 2'd0) ? data_int : + (endianness == 2'd1) ? {data_int[47:32], data_int[63:48], data_int[15: 0], data_int[31:16]} : + (endianness == 2'd2) ? {data_int[39:32], data_int[47:40], data_int[55:48], data_int[63:56], + data_int[ 7: 0], data_int[15: 8], data_int[23:16], data_int[31:24]} : + 64'd0; + // Swap samples + wire [63:0] data_out = (swap_samples == 2'd0) ? data : + (swap_samples == 2'd1) ? {data[55:48], data[63:56], data[39:32], data[47:40], + data[23:16], data[31:24], data[ 7: 0], data[15: 8]} : + (swap_samples == 2'd2) ? {data[47:32], data[63:48], data[15: 0], data[31:16]} : + (swap_samples == 2'd3) ? {data[31: 0], data[63:32]} : + 64'd0; + + assign int_tdata = (state == HEAD) ? { 2'b00, send_time, 1'b0, seqnum, pkt_len, sid } : + (state == TIME) ? time_cnt : data_out; + + assign int_tlast = (count == len); + + reg [15:0] line_timer; + always @(posedge clk) begin + if (reset) begin + line_timer <= 0; + end else begin + if (line_timer == 0 || line_timer == 1) begin + line_timer <= rate; + end else begin + line_timer <= line_timer - 1; + end + end + end + + assign int_tvalid = enable & ((state==HEAD)|(state==DATA)|(state==TIME)) & (line_timer==0 || line_timer==1); + + axi_packet_gate #(.WIDTH(64)) gate ( + .clk(clk), .reset(reset), .clear(1'b0), + .i_tdata(int_tdata), .i_tlast(int_tlast), .i_terror(1'b0), .i_tvalid(int_tvalid), .i_tready(int_tready), + .o_tdata(o_tdata), .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready)); + +endmodule // file_source diff --git a/fpga/usrp3/lib/rfnoc/fir_filter_slice.v b/fpga/usrp3/lib/rfnoc/fir_filter_slice.v new file mode 100644 index 000000000..3f83f54e3 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fir_filter_slice.v @@ -0,0 +1,74 @@ +// +// Copyright 2017 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Multiply-accumulate with preadder for use as a computation unit +// in FIR filters. Designed to infer a DSP48 for all registers +// and arithmetic. +// +// Parameters: +// IN_WIDTH - Input width +// COEFF_WIDTH - Coefficient width +// ACCUM_WIDTH - Accumulator width +// OUT_WIDTH - Output width +// +module fir_filter_slice #( + parameter IN_WIDTH = 16, + parameter COEFF_WIDTH = 16, + parameter ACCUM_WIDTH = 32, + parameter OUT_WIDTH = 32) +( + input clk, + input reset, + input clear, + input sample_in_stb, + input signed [IN_WIDTH-1:0] sample_in_a, // Sample in + input signed [IN_WIDTH-1:0] sample_in_b, // Sample in for symmetric filters + output signed [IN_WIDTH-1:0] sample_forward, // Delayed sample in to forward + input signed [COEFF_WIDTH-1:0] coeff_in, // Filter tap coefficient + output signed [COEFF_WIDTH-1:0] coeff_forward, // Filter tap coefficient to forward + input coeff_load_stb, // Load coefficient + input signed [ACCUM_WIDTH-1:0] sample_accum, // Accumulating path + output signed [OUT_WIDTH-1:0] sample_out // Result +); + + reg signed [IN_WIDTH-1:0] a_reg[0:1]; + reg signed [IN_WIDTH-1:0] d_reg; + reg signed [IN_WIDTH:0] ad_reg; + reg signed [COEFF_WIDTH-1:0] b_reg[0:1]; + reg signed [IN_WIDTH+COEFF_WIDTH:0] m_reg; + reg signed [ACCUM_WIDTH-1:0] p_reg; + + always @(posedge clk) begin + if (reset | clear) begin + a_reg[0] <= 0; + a_reg[1] <= 0; + d_reg <= 0; + b_reg[0] <= 0; + b_reg[1] <= 0; + ad_reg <= 0; + m_reg <= 0; + p_reg <= 0; + end else begin + if (sample_in_stb) begin + a_reg[0] <= sample_in_a; + a_reg[1] <= a_reg[0]; + d_reg <= sample_in_b; + ad_reg <= a_reg[1] + d_reg; + m_reg <= ad_reg * b_reg[1]; + p_reg <= sample_accum + m_reg; + end + if (coeff_load_stb) begin + b_reg[0] <= coeff_in; + end + b_reg[1] <= b_reg[0]; + end + end + + assign coeff_forward = b_reg[0]; + assign sample_forward = a_reg[1]; + assign sample_out = p_reg[OUT_WIDTH-1:0]; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/fosphor/axi_logpwr.v b/fpga/usrp3/lib/rfnoc/fosphor/axi_logpwr.v new file mode 100644 index 000000000..037c33563 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/axi_logpwr.v @@ -0,0 +1,102 @@ +/* + * axi_logpwr.v + * + * Copyright (C) 2014 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module axi_logpwr #( + parameter [1:0] RANDOM_MODE = 2'b11 +)( + input clk, input reset, + input [31:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [15:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + // Signals + reg ready; + reg valid_1; + wire valid_12; + wire last_12; + + wire [31:0] rng; + + wire [15:0] in_real_0; + wire [15:0] in_imag_0; + wire [15:0] out_logpwr_12; + + wire [16:0] fifo_di; + wire [16:0] fifo_do; + wire fifo_wren; + wire fifo_afull; + wire fifo_rden; + wire fifo_empty; + + // Input control + assign in_real_0 = i_tdata[31:16]; + assign in_imag_0 = i_tdata[15:0]; + + always @(posedge clk) + begin + ready <= ~fifo_afull | o_tready; + valid_1 <= i_tvalid & ready; + end + + assign i_tready = ready; + + // Delays + delay_bit #(11) dl_valid (valid_1, valid_12, clk); + delay_bit #(12) dl_last (i_tlast, last_12, clk); + + // RNG Instance + rng rng_I ( + .out(rng), + .clk(clk), + .rst(reset) + ); + + // logpwr Instance + f15_logpwr logpwr_I ( + .in_real_0(in_real_0), + .in_imag_0(in_imag_0), + .out_12(out_logpwr_12), + .rng(rng), + .random_mode(RANDOM_MODE), + .clk(clk), + .rst(reset) + ); + + // Output FIFO + assign fifo_di = { last_12, out_logpwr_12 }; + assign fifo_wren = { valid_12 }; + + fifo_srl #( + .WIDTH(17), + .LOG2_DEPTH(6), + .AFULL_LEVEL(49) + ) fifo_I ( + .di(fifo_di), + .wren(fifo_wren), + .afull(fifo_afull), + .do(fifo_do), + .rden(fifo_rden), + .empty(fifo_empty), + .clk(clk), + .rst(reset) + ); + + assign o_tdata = fifo_do[15:0]; + assign o_tlast = fifo_do[16]; + assign o_tvalid = ~fifo_empty; + + assign fifo_rden = ~fifo_empty & o_tready; + +endmodule // axi_logpwr diff --git a/fpga/usrp3/lib/rfnoc/fosphor/delay.v b/fpga/usrp3/lib/rfnoc/fosphor/delay.v new file mode 100644 index 000000000..44c043642 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/delay.v @@ -0,0 +1,140 @@ +/* + * delay.v + * + * Generates a delay line/bus using a combination of SRL and Register + * + * Copyright (C) 2014 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +// --------------------------------------------------------------------------- +// Single line delay +// --------------------------------------------------------------------------- + +module delay_bit #( + parameter integer DELAY = 1 +)( + input wire d, + output wire q, + input wire clk +); + + // Signals + wire [4:0] addr = DELAY - 2; + wire ff_in; + + // Generate SRL if needed (or bypass if not) + generate + if (DELAY > 17) begin + SRLC32E srl_I ( + .Q(ff_in), + .A(addr), + .CE(1'b1), + .CLK(clk), + .D(d) + ); + end else if (DELAY > 1) begin + SRL16E srl_I ( + .Q(ff_in), + .A0(addr[0]), + .A1(addr[1]), + .A2(addr[2]), + .A3(addr[3]), + .CE(1'b1), + .CLK(clk), + .D(d) + ); + end else begin + assign ff_in = d; + end + endgenerate + + // Generate flip-flop if needed (or bypass if not) + generate + if (DELAY > 0) begin + FDRE ff_I ( + .Q(q), + .C(clk), + .CE(1'b1), + .D(ff_in), + .R(1'b0) + ); + end else begin + assign q = ff_in; + end + endgenerate + +endmodule // delay_bit + + +// --------------------------------------------------------------------------- +// Bus delay +// --------------------------------------------------------------------------- + +module delay_bus #( + parameter integer DELAY = 1, + parameter integer WIDTH = 1 +)( + input wire [WIDTH-1:0] d, + output wire [WIDTH-1:0] q, + input wire clk +); + genvar i; + + // Variables / Signals + wire [4:0] addr = DELAY - 2; + wire [WIDTH-1:0] ff_in; + + // Generate SRL if needed (or bypass if not) + generate + if (DELAY > 17) begin + for (i=0; i<WIDTH; i=i+1) + SRLC32E srl_I ( + .Q(ff_in[i]), + .A(addr), + .CE(1'b1), + .CLK(clk), + .D(d[i]) + ); + end else if (DELAY > 1) begin + for (i=0; i<WIDTH; i=i+1) + SRL16E srl_I ( + .Q(ff_in[i]), + .A0(addr[0]), + .A1(addr[1]), + .A2(addr[2]), + .A3(addr[3]), + .CE(1'b1), + .CLK(clk), + .D(d[i]) + ); + end else begin + assign ff_in = d; + end + endgenerate + + // Generate flip-flop if needed (or bypass if not) + generate + if (DELAY > 0) begin + for (i=0; i<WIDTH; i=i+1) + FDRE ff_I ( + .Q(q[i]), + .C(clk), + .CE(1'b1), + .D(ff_in[i]), + .R(1'b0) + ); + end else begin + assign q = ff_in; + end + endgenerate + +endmodule // delay_bus diff --git a/fpga/usrp3/lib/rfnoc/fosphor/f15_avg.v b/fpga/usrp3/lib/rfnoc/fosphor/f15_avg.v new file mode 100644 index 000000000..30123ee20 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/f15_avg.v @@ -0,0 +1,117 @@ +/* + * f15_avg.v + * + * Applies the y(t+1) = alpha * y(t) + (1 - alpha) * x(t) + * to compute an IIR average + * + * Copyright (C) 2015 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module f15_avg #( + parameter integer Y_WIDTH = 12, + parameter integer X_WIDTH = 16 +)( + input wire [Y_WIDTH-1:0] yin_0, + input wire [X_WIDTH-1:0] x_0, + input wire [15:0] rng_0, + input wire [15:0] alpha_0, + input wire clear_0, + output wire [Y_WIDTH-1:0] yout_4, + input wire clk, + input wire rst +); + + // Signals + wire [X_WIDTH-1:0] x_2; + wire clear_3; + wire [47:0] pout_4; + + // Main DSP Instance + DSP48E1 #( + .A_INPUT("DIRECT"), + .B_INPUT("DIRECT"), + .USE_DPORT("TRUE"), + .USE_MULT("MULTIPLY"), + .AUTORESET_PATDET("NO_RESET"), + .MASK(48'h3fffffffffff), + .PATTERN(48'h000000000000), + .SEL_MASK("MASK"), + .SEL_PATTERN("PATTERN"), + .USE_PATTERN_DETECT("PATDET"), + .ACASCREG(1), + .ADREG(1), + .ALUMODEREG(1), + .AREG(1), + .BCASCREG(1), + .BREG(2), + .CARRYINREG(1), + .CARRYINSELREG(1), + .CREG(1), + .DREG(1), + .INMODEREG(1), + .MREG(1), + .OPMODEREG(1), + .PREG(1), + .USE_SIMD("ONE48") + ) + dsp_avg_I ( + .P(pout_4), + .ACIN(30'h0), + .BCIN(18'h0), + .CARRYCASCIN(1'h0), + .MULTSIGNIN(1'h0), + .PCIN(48'h000000000000), + .ALUMODE(4'b0000), // Z + X + Y + CIN + .CARRYINSEL(3'h0), + .CEINMODE(1'b1), + .CLK(clk), + .INMODE(5'b01100), // B=B2, A=D-A2 + .OPMODE(7'b0110101), // X=M1, Y=M2, Z=C + .RSTINMODE(rst), + .A({{(30-X_WIDTH){1'b0}}, x_0}), + .B({2'h0, alpha_0}), + .C({{(32-X_WIDTH){1'b0}}, x_2, 16'h8000}), + .CARRYIN(1'b0), + .D({{(25-X_WIDTH){1'b0}}, yin_0, rng_0[X_WIDTH-Y_WIDTH-1:0]}), + .CEA1(1'b0), + .CEA2(1'b1), + .CEAD(1'b1), + .CEALUMODE(1'b1), + .CEB1(1'b1), + .CEB2(1'b1), + .CEC(1'b1), + .CECARRYIN(1'b1), + .CECTRL(1'b1), + .CED(1'b1), + .CEM(1'b1), + .CEP(1'b1), + .RSTA(rst), + .RSTALLCARRYIN(rst), + .RSTALUMODE(rst), + .RSTB(rst), + .RSTC(rst), + .RSTCTRL(rst), + .RSTD(rst), + .RSTM(rst), + .RSTP(clear_3) + ); + + // Delay x for the C input + delay_bus #(2, X_WIDTH) dl_x (x_0, x_2, clk); + + // Delay clear to use as reset for P + delay_bit #(3) dl_clear (clear_0, clear_3, clk); + + // Map the output + assign yout_4 = pout_4[X_WIDTH+15:X_WIDTH-Y_WIDTH+16]; + +endmodule // f15_avg diff --git a/fpga/usrp3/lib/rfnoc/fosphor/f15_binmap.v b/fpga/usrp3/lib/rfnoc/fosphor/f15_binmap.v new file mode 100644 index 000000000..36935a0e9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/f15_binmap.v @@ -0,0 +1,139 @@ +/* + * f15_binmap.v + * + * Maps a log pwr value to an histogram bin + * + * Copyright (C) 2014 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module f15_binmap #( + parameter integer BIN_WIDTH = 6, + parameter integer SCALE_FRAC_BITS = 8 +)( + input wire [15:0] in_0, + input wire [15:0] offset_0, // unsigned + input wire [15:0] scale_0, // unsigned + output reg [BIN_WIDTH-1:0] bin_5, // bin number + output reg sat_ind_5, // saturation indicator + input wire clk, + input wire rst +); + localparam integer TBI = 15 + SCALE_FRAC_BITS; // Top-Bit-Index + + // Signals + wire [47:0] dsp_pout_4; + wire dsp_pat_match_4; + + + // Main DSP + // -------- + // computes (in - cfg_offset) * cfg_scale + + DSP48E1 #( + .A_INPUT("DIRECT"), + .B_INPUT("DIRECT"), + .USE_DPORT("TRUE"), + .USE_MULT("MULTIPLY"), + .AUTORESET_PATDET("NO_RESET"), + .MASK({1'b1, {(46-TBI){1'b0}}, {(TBI+1){1'b1}}}), + .PATTERN(48'h000000000000), + .SEL_MASK("MASK"), + .SEL_PATTERN("PATTERN"), + .USE_PATTERN_DETECT("PATDET"), + .ACASCREG(1), + .ADREG(1), + .ALUMODEREG(1), + .AREG(1), + .BCASCREG(2), + .BREG(2), + .CARRYINREG(1), + .CARRYINSELREG(1), + .CREG(1), + .DREG(1), + .INMODEREG(1), + .MREG(1), + .OPMODEREG(1), + .PREG(1), + .USE_SIMD("ONE48") + ) + dsp_binmap_I ( + .PATTERNDETECT(dsp_pat_match_4), + .P(dsp_pout_4), + .ACIN(30'h0), + .BCIN(18'h0), + .CARRYCASCIN(1'h0), + .MULTSIGNIN(1'h0), + .PCIN(48'h000000000000), + .ALUMODE(4'b0000), // Z + X + Y + CIN + .CARRYINSEL(3'h0), + .CEINMODE(1'b1), + .CLK(clk), + .INMODE(5'b01100), // B=B2, A=D-A2 + .OPMODE(7'b0000101), // X=M1, Y=M2, Z=0 + .RSTINMODE(rst), + .A({14'h0, offset_0}), + .B({ 2'h0, scale_0}), + .C({48'h0}), + .CARRYIN(1'b0), + .D({ 9'h0, in_0}), + .CEA1(1'b0), + .CEA2(1'b1), + .CEAD(1'b1), + .CEALUMODE(1'b1), + .CEB1(1'b1), + .CEB2(1'b1), + .CEC(1'b1), + .CECARRYIN(1'b1), + .CECTRL(1'b1), + .CED(1'b1), + .CEM(1'b1), + .CEP(1'b1), + .RSTA(rst), + .RSTALLCARRYIN(rst), + .RSTALUMODE(rst), + .RSTB(rst), + .RSTC(rst), + .RSTCTRL(rst), + .RSTD(rst), + .RSTM(rst), + .RSTP(rst) + ); + + + // Post-DSP mapping & saturation + // ----------------------------- + + always @(posedge clk) + begin + if (rst == 1) begin + bin_5 <= 0; + sat_ind_5 <= 0; + end else begin + // Undeflow + if (dsp_pout_4[47] == 1) begin + bin_5 <= {BIN_WIDTH{1'b0}}; + sat_ind_5 <= 1; + + // Overflow + end else if (dsp_pat_match_4 == 0) begin + bin_5 <= {BIN_WIDTH{1'b1}}; + sat_ind_5 <= 1; + + // In-range + end else begin + bin_5 <= dsp_pout_4[TBI:TBI-BIN_WIDTH+1]; + sat_ind_5 <= 0; + end + end + end + +endmodule // f15_binmap diff --git a/fpga/usrp3/lib/rfnoc/fosphor/f15_core.v b/fpga/usrp3/lib/rfnoc/fosphor/f15_core.v new file mode 100644 index 000000000..003fd1d50 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/f15_core.v @@ -0,0 +1,609 @@ +/* + * f15_core.v + * + * Core of the fosphor IP + * + * Copyright (C) 2014,2015 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module f15_core ( + input clk, input reset, + input clear_req, + input [ 1:0] cfg_random, + input [15:0] cfg_offset, input [15:0] cfg_scale, + input [15:0] cfg_trise, input [15:0] cfg_tdecay, + input [15:0] cfg_alpha, input [15:0] cfg_epsilon, + input [11:0] cfg_decim, input cfg_decim_changed, + input [ 1:0] cfg_wf_div, input cfg_wf_mode, + input [ 7:0] cfg_wf_decim, input cfg_wf_decim_changed, + input [31:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [31:0] o_hist_tdata, output o_hist_tlast, output o_hist_tvalid, input o_hist_tready, output o_hist_teob, + output [31:0] o_wf_tdata, output o_wf_tlast, output o_wf_tvalid, input o_wf_tready +); + // Signals + reg [31:0] in_data; + reg in_last; + reg in_valid; + reg in_ready; + + wire [15:0] proc_real_0, proc_imag_0; + wire [15:0] proc_logpwr_12, proc_logpwr_end; + wire proc_last_0, proc_last_12, proc_last_end; + wire proc_valid_0, proc_valid_12, proc_valid_end; + + reg [5:0] proc_binscan_addr_end; + reg proc_binscan_last_end; + reg proc_clear_end; + reg clear_pending; + + wire rise_last_0, rise_last_15; + wire rise_valid_0, rise_valid_15, rise_valid_24; + wire [15:0] rise_logpwr_0; + wire [5:0] rise_pwrbin_5, rise_pwrbin_15; + reg [9:0] rise_addr_lsb_15; + wire [15:0] rise_addr_15, rise_addr_24; + wire [8:0] rise_intensity_18, rise_intensity_23; + reg [8:0] rise_intensity_24; + + wire decay_last_0, decay_last_9; + wire decay_valid_0, decay_valid_9; + reg [ 9:0] decay_addr_lsb_0; + wire [15:0] decay_addr_0, decay_addr_9; + wire [8:0] decay_intensity_3, decay_intensity_8; + reg [8:0] decay_intensity_9; + wire decay_clear_0, decay_clear_9; + + reg [10:0] sls_addr_0; + wire [10:0] sls_addr_6; + wire [35:0] sls_data_2, sls_data_6; + wire sls_last_0; + wire sls_valid_0, sls_valid_6; + + wire [15:0] avgmh_logpwr_0, avgmh_logpwr_2; + wire avgmh_clear_0, avgmh_clear_2; + wire [11:0] avgmh_avg_2, avgmh_avg_6, avgmh_avg_9; + wire [11:0] avgmh_max_2, avgmh_max_6, avgmh_max_9; + + wire [5:0] out_binaddr_0, out_binaddr_9; + wire out_binlast_0, out_binlast_9; + wire [33:0] out_hist_fifo_di; + wire out_hist_fifo_wren; + wire out_hist_fifo_afull; + wire [33:0] out_hist_fifo_do; + wire out_hist_fifo_rden; + wire out_hist_fifo_empty; + + wire [11:0] wf_data_2, wf_data_5, wf_data_6; + wire [15:0] wf_logpwr_0, wf_logpwr_2; + wire [ 7:0] wf_out_data_5; + wire wf_last_0, wf_last_2, wf_out_last_5; + wire wf_valid_0, wf_valid_2, wf_out_valid_5; + + reg [ 1:0] out_wf_cnt; + reg [32:0] out_wf_fifo_di; + reg out_wf_fifo_wren; + wire out_wf_fifo_afull; + wire [32:0] out_wf_fifo_do; + wire out_wf_fifo_rden; + wire out_wf_fifo_empty; + + wire [31:0] rng; + + + // ----------------------------------------------------------------------- + // Input + // ----------------------------------------------------------------------- + + always @(posedge clk) + begin + // Control + if (reset) begin + in_valid <= 1'b0; + in_ready <= 1'b0; + end else begin + // Valid flag + in_valid <= i_tvalid & i_tready; + + // We know we can get a sample if : + // - Both outputs consumed a sample + // - Both FIFOs have enough space + in_ready <= (o_hist_tready & o_wf_tready) | + (~out_hist_fifo_afull & ~out_wf_fifo_afull); + end + + // Data pipeline + in_data <= i_tdata; + in_last <= i_tlast; + end + + assign i_tready = in_ready; + + + // ----------------------------------------------------------------------- + // Processing chain + // ----------------------------------------------------------------------- + + // Input to this stage + assign proc_real_0 = in_data[31:16]; + assign proc_imag_0 = in_data[15:0]; + assign proc_last_0 = in_last; + assign proc_valid_0 = in_valid; + + // Log power + f15_logpwr logpwr_I ( + .in_real_0(proc_real_0), + .in_imag_0(proc_imag_0), + .out_12(proc_logpwr_12), + .rng(rng), + .random_mode(cfg_random), + .clk(clk), + .rst(reset) + ); + + // Aggregation + // Not supported ATM but this is where it would be + + // Flag propagation + delay_bit #(12) dl_proc_last (proc_last_0, proc_last_12, clk); + delay_bit #(12) dl_proc_valid (proc_valid_0, proc_valid_12, clk); + + // Even/Odd resequencing + f15_eoseq #( + .WIDTH(16) + ) eoseq_I ( + .in_data(proc_logpwr_12), + .in_valid(proc_valid_12), + .in_last(proc_last_12), + .out_data(proc_logpwr_end), + .out_valid(proc_valid_end), + .out_last(proc_last_end), + .clk(clk), + .rst(reset) + ); + + // Bin address counter and clear process + // We do this here so we can propagate to every other stage with + // just delay lines + always @(posedge clk) + begin + if (reset) begin + proc_binscan_addr_end <= 6'd0; + proc_binscan_last_end <= 1'b0; + end else if (proc_valid_end & proc_last_end) begin + proc_binscan_addr_end <= proc_binscan_addr_end + 1; + proc_binscan_last_end <= (proc_binscan_addr_end == 6'h3e); + end + end + + always @(posedge clk) + begin + if (reset) begin + clear_pending <= 1'b0; + proc_clear_end <= 1'b0; + end else begin + if (proc_valid_end & proc_last_end & proc_binscan_last_end) begin + clear_pending <= 1'b0; + proc_clear_end <= clear_pending; + end else begin + clear_pending <= clear_pending | clear_req; + end + end + end + + + // ----------------------------------------------------------------------- + // Rise + // ----------------------------------------------------------------------- + + // Input of this stage + assign rise_last_0 = proc_last_end; + assign rise_valid_0 = proc_valid_end; + assign rise_logpwr_0 = proc_logpwr_end; + + // Power Bin mapping + f15_binmap #( + .BIN_WIDTH(6), + .SCALE_FRAC_BITS(8) + ) binmap_I ( + .in_0(rise_logpwr_0), + .offset_0(cfg_offset), + .scale_0(cfg_scale), + .bin_5(rise_pwrbin_5), + .sat_ind_5(), // FIXME: Could be use to disable write ena (configurable) + .clk(clk), + .rst(reset) + ); + + // Delay + // (We need to make sure rise doesn't conflict with decay) + delay_bus #(10, 6) dl_pwrbin (rise_pwrbin_5, rise_pwrbin_15, clk); + delay_bit #(15) dl_valid (rise_valid_0, rise_valid_15, clk); + delay_bit #(15) dl_last (rise_last_0, rise_last_15, clk); + + // Address + always @(posedge clk) + begin + if (reset) + rise_addr_lsb_15[9:0] <= 9'd0; + else if (rise_valid_15) + if (rise_last_15) + rise_addr_lsb_15 <= 9'd0; + else + rise_addr_lsb_15 <= rise_addr_lsb_15[9:0] + 1; + end + + assign rise_addr_15 = { rise_pwrbin_15, rise_addr_lsb_15 }; + + // Exponential rise + f15_rise_decay #( + .WIDTH(9) + ) rise_I ( + .in_0(rise_intensity_18), + .out_5(rise_intensity_23), + .k_0(cfg_trise), + .ena_0(1'b1), + .mode_0(1'b0), + .rng(rng[15:0]), + .clk(clk), + .rst(reset) + ); + + // Need one more stage just for proper even/odd interlacing + always @(posedge clk) + rise_intensity_24 <= rise_intensity_23; + + // Propagate control + delay_bit #(9) dl_rise_valid2 (rise_valid_15, rise_valid_24, clk); + delay_bus #(9, 16) dl_rise_addr2 (rise_addr_15, rise_addr_24, clk); + + + // ----------------------------------------------------------------------- + // State storage + // ----------------------------------------------------------------------- + + f15_histo_mem #( + .ADDR_WIDTH(16) + ) mem_I ( + // Rise readout + .addr_AR(rise_addr_15), + .data_AR(rise_intensity_18), + .ena_AR(rise_valid_15), + + // Rise writeback + .addr_AW(rise_addr_24), + .data_AW(rise_intensity_24), + .ena_AW(rise_valid_24), + + // Decay readout + .addr_BR(decay_addr_0), + .data_BR(decay_intensity_3), + .ena_BR(decay_valid_0), + + // Decay writeback + .addr_BW(decay_addr_9), + .data_BW(decay_intensity_9), + .ena_BW(decay_valid_9), + + // Common + .clk(clk), + .rst(reset) + ); + + + // ----------------------------------------------------------------------- + // Decay & Clear + // ----------------------------------------------------------------------- + + // Input of this stage + assign decay_last_0 = proc_last_end; + assign decay_valid_0 = proc_valid_end; + assign decay_clear_0 = proc_clear_end; + + // Address generation + always @(posedge clk) + begin + if (reset) + decay_addr_lsb_0 <= 10'd0; + else if (decay_valid_0) + if (decay_last_0) + decay_addr_lsb_0 <= 10'd0; + else + decay_addr_lsb_0 <= decay_addr_lsb_0 + 1; + end + + assign decay_addr_0 = { proc_binscan_addr_end, decay_addr_lsb_0 }; + + // Exponential decay + f15_rise_decay #( + .WIDTH(9) + ) decay_I ( + .in_0(decay_intensity_3), + .out_5(decay_intensity_8), + .k_0(cfg_tdecay), + .ena_0(1'b1), + .mode_0(1'b1), + .rng(rng[15:0]), + .clk(clk), + .rst(reset) + ); + + // Need one more stage just for proper even/odd interlacing + // Also do the clear in there + always @(posedge clk) + if (decay_clear_9) + decay_intensity_9 <= 9'd0; + else + decay_intensity_9 <= decay_intensity_8; + + // Propagate control + delay_bit #(9) dl_decay_valid (decay_valid_0, decay_valid_9, clk); + delay_bit #(9) dl_decay_last (decay_last_0, decay_last_9, clk); + delay_bit #(9) dl_decay_clear (decay_clear_0, decay_clear_9, clk); + delay_bus #(9, 16) dl_decay_addr (decay_addr_0, decay_addr_9, clk); + + + // ----------------------------------------------------------------------- + // Shared line-storage + // ----------------------------------------------------------------------- + // This is shared between the average/max-hold spectrum lines and the + // waterfall aggregation + + // Input of this stage + assign sls_last_0 = proc_last_end; + assign sls_valid_0 = proc_valid_end; + + // Address + always @(posedge clk) + begin + if (reset) + sls_addr_0 <= 11'd0; + else if (sls_valid_0) + if (sls_last_0) + sls_addr_0 <= 11'd0; + else + sls_addr_0 <= sls_addr_0 + 1; + end + + delay_bus #(6, 11) dl_sls_addr (sls_addr_0, sls_addr_6, clk); + delay_bit #(6) dl_sls_valid (sls_valid_0, sls_valid_6, clk); + + // Storage + f15_line_mem #( + .AWIDTH(11), + .DWIDTH(36) + ) line_mem_I ( + .rd_addr(sls_addr_0), + .rd_data(sls_data_2), + .rd_ena(sls_valid_0), + .wr_addr(sls_addr_6), + .wr_data(sls_data_6), + .wr_ena(sls_valid_6), + .clk(clk), + .rst(reset) + ); + + // Data mapping + assign avgmh_avg_2 = sls_data_2[11: 0]; + assign avgmh_max_2 = sls_data_2[23:12]; + assign wf_data_2 = sls_data_2[35:24]; + + assign sls_data_6[11: 0] = avgmh_avg_6; + assign sls_data_6[23:12] = avgmh_max_6; + assign sls_data_6[35:24] = wf_data_6; + + + // ----------------------------------------------------------------------- + // Average and Max-Hold + // ----------------------------------------------------------------------- + + // Input of this stage + assign avgmh_logpwr_0 = proc_logpwr_end; + assign avgmh_clear_0 = proc_clear_end; + + // Modify stage: Average + f15_avg #( + .Y_WIDTH(12), + .X_WIDTH(16) + ) avg_I ( + .yin_0(avgmh_avg_2), + .x_0(avgmh_logpwr_2), + .rng_0(rng[15:0]), + .alpha_0(cfg_alpha), + .clear_0(avgmh_clear_2), + .yout_4(avgmh_avg_6), + .clk(clk), + .rst(reset) + ); + + // Modify stage: Max Hold + f15_maxhold #( + .Y_WIDTH(12), + .X_WIDTH(16), + .FRAC_WIDTH(8) + ) maxhold_I ( + .yin_0(avgmh_max_2), + .x_0(avgmh_logpwr_2), + .rng_0(rng[15:0]), + .epsilon_0(cfg_epsilon), + .clear_0(avgmh_clear_2), + .yout_4(avgmh_max_6), + .clk(clk), + .rst(reset) + ); + + // Delays + delay_bus #(2, 16) dl_avgmh_logpwr (avgmh_logpwr_0, avgmh_logpwr_2, clk); + delay_bit #(2) dl_avgmh_clear (avgmh_clear_0, avgmh_clear_2, clk); + delay_bus #(3, 12) dl_avgmh_max (avgmh_max_6, avgmh_max_9, clk); + delay_bus #(3, 12) dl_avgmh_avg (avgmh_avg_6, avgmh_avg_9, clk); + + + // ----------------------------------------------------------------------- + // Histogram Output + // ----------------------------------------------------------------------- + + // For the 'tap' to work, we need avmh and decay blocks to have the + // same number of pipeline stage and be right after proc. + + // Input of this stage + assign out_binaddr_0 = proc_binscan_addr_end; + assign out_binlast_0 = proc_binscan_last_end; + + // Delays + delay_bus #(9, 6) dl_out_binaddr (out_binaddr_0, out_binaddr_9, clk); + delay_bit #(9) dl_out_binlast (out_binlast_0, out_binlast_9, clk); + + // Packetizer + f15_packetizer #( + .BIN_WIDTH(6), + .DECIM_WIDTH(12) + ) packetizer_I ( + .in_bin_addr(out_binaddr_9), + .in_bin_last(out_binlast_9), + .in_histo(decay_intensity_9[8:1]), + .in_spectra_max(avgmh_max_9[11:4]), + .in_spectra_avg(avgmh_avg_9[11:4]), + .in_last(decay_last_9), + .in_valid(decay_valid_9), + .out_data(out_hist_fifo_di[31:0]), + .out_last(out_hist_fifo_di[32]), + .out_eob(out_hist_fifo_di[33]), + .out_valid(out_hist_fifo_wren), + .cfg_decim(cfg_decim), + .cfg_decim_changed(cfg_decim_changed), + .clk(clk), + .rst(reset) + ); + + // FIFO + fifo_srl #( + .WIDTH(34), + .LOG2_DEPTH(6), + .AFULL_LEVEL(20) + ) out_hist_fifo_I ( + .di(out_hist_fifo_di), + .wren(out_hist_fifo_wren), + .afull(out_hist_fifo_afull), + .do(out_hist_fifo_do), + .rden(out_hist_fifo_rden), + .empty(out_hist_fifo_empty), + .clk(clk), + .rst(reset) + ); + + // AXI mapping + assign o_hist_tdata = out_hist_fifo_do[31:0]; + assign o_hist_tlast = out_hist_fifo_do[32]; + assign o_hist_teob = out_hist_fifo_do[33]; + assign o_hist_tvalid = ~out_hist_fifo_empty; + assign out_hist_fifo_rden = ~out_hist_fifo_empty && o_hist_tready; + + + // ----------------------------------------------------------------------- + // Waterfall Output + // ----------------------------------------------------------------------- + + // Input to this stage (synced to SLS) + assign wf_logpwr_0 = proc_logpwr_end; + assign wf_last_0 = proc_last_end; + assign wf_valid_0 = proc_valid_end; + + // Delay some input signals + delay_bus #(2, 16) dl_wf_logpwr (wf_logpwr_0, wf_logpwr_2, clk); + delay_bit #(2) dl_wf_last (wf_last_0, wf_last_2, clk); + delay_bit #(2) dl_wf_valid (wf_valid_0, wf_valid_2, clk); + + // Decimation / Aggregation + f15_wf_agg #( + .Y_WIDTH(12), + .X_WIDTH(16), + .DECIM_WIDTH(8) + ) dut_wf ( + .yin_0(wf_data_2), + .x_0(wf_logpwr_2), + .valid_0(wf_valid_2), + .last_0(wf_last_2), + .rng_0(rng[15:0]), + .yout_3(wf_data_5), + .zout_3(wf_out_data_5), + .zvalid_3(wf_out_valid_5), + .cfg_div(cfg_wf_div), + .cfg_mode(cfg_wf_mode), + .cfg_decim(cfg_wf_decim), + .cfg_decim_changed(cfg_wf_decim_changed), + .clk(clk), + .rst(reset) + ); + + // Delay some output signals + delay_bus #(1, 12) dl_wf_data (wf_data_5, wf_data_6, clk); + delay_bit #(3) dl_wf_out_last (wf_last_2, wf_out_last_5, clk); + + // Pack into 32 bits words + always @(posedge clk) + begin + if (reset) begin + out_wf_fifo_di <= 0; + out_wf_fifo_wren <= 1'b0; + out_wf_cnt <= 2'b00; + end else begin + if (wf_out_valid_5) begin + if (wf_out_last_5) begin + out_wf_fifo_di <= { 1'b1, out_wf_fifo_di[23:0], wf_out_data_5 }; + out_wf_fifo_wren <= 1'b1; + out_wf_cnt <= 2'b00; + end else begin + out_wf_fifo_di <= { 1'b0, out_wf_fifo_di[23:0], wf_out_data_5 }; + out_wf_fifo_wren <= (out_wf_cnt == 2'b11); + out_wf_cnt <= out_wf_cnt + 1; + end + end else begin + out_wf_fifo_wren <= 1'b0; + end + end + end + + // FIFO + fifo_srl #( + .WIDTH(33), + .LOG2_DEPTH(6), + .AFULL_LEVEL(20) + ) out_wf_fifo_I ( + .di(out_wf_fifo_di), + .wren(out_wf_fifo_wren), + .afull(out_wf_fifo_afull), + .do(out_wf_fifo_do), + .rden(out_wf_fifo_rden), + .empty(out_wf_fifo_empty), + .clk(clk), + .rst(reset) + ); + + // AXI mapping + assign o_wf_tdata = out_wf_fifo_do[31:0]; + assign o_wf_tlast = out_wf_fifo_do[32]; + assign o_wf_tvalid = ~out_wf_fifo_empty; + assign out_wf_fifo_rden = ~out_wf_fifo_empty && o_wf_tready; + + + // ----------------------------------------------------------------------- + // Misc + // ----------------------------------------------------------------------- + + // RNG +`ifdef SIM + assign rng = 0; +`else + rng rng_I (rng, clk, reset); +`endif + +endmodule // f15_core diff --git a/fpga/usrp3/lib/rfnoc/fosphor/f15_eoseq.v b/fpga/usrp3/lib/rfnoc/fosphor/f15_eoseq.v new file mode 100644 index 000000000..6d209cd16 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/f15_eoseq.v @@ -0,0 +1,78 @@ +/* + * f15_eoseq.v + * + * Resequence a data flow with data/valid/last ensuring EVEN/ODD + * sequencing (even data on even cycles, odd data on odd cycles) + * + * Copyright (C) 2014 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module f15_eoseq #( + parameter integer WIDTH = 16 +)( + input wire [WIDTH-1:0] in_data, + input wire in_valid, + input wire in_last, + output reg [WIDTH-1:0] out_data, + output reg out_valid, + output reg out_last, + input wire clk, + input wire rst +); + + // Signals + reg [WIDTH-1:0] buf_data; + reg buf_valid; + reg buf_last; + + wire flip; + reg odd; + reg sel; + + // Control + always @(posedge clk) + if (rst) + odd <= 1'b0; + else + odd <= ~(in_last & in_valid) & (odd ^ in_valid); + + always @(posedge clk) + if (rst) + sel <= 1'b0; + else if (flip) + sel <= ~sel; + + assign flip = ~in_valid | (in_last & ~odd); + + // Buffer + always @(posedge clk) + begin + buf_data <= in_data; + buf_valid <= in_valid; + buf_last <= in_last; + end + + // Output + always @(posedge clk) + begin + if (sel) begin + out_data <= buf_data; + out_valid <= buf_valid; + out_last <= buf_last; + end else begin + out_data <= in_data; + out_valid <= in_valid; + out_last <= in_last; + end + end + +endmodule // f15_eoseq diff --git a/fpga/usrp3/lib/rfnoc/fosphor/f15_histo_mem.v b/fpga/usrp3/lib/rfnoc/fosphor/f15_histo_mem.v new file mode 100644 index 000000000..7a7f7e279 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/f15_histo_mem.v @@ -0,0 +1,287 @@ +/* + * f15_histo_mem.v + * + * Histogram State storage. Basically a memory with 2 R/W ports where + * each port can do read & write at different address at the same time + * if those address are inteleaved (like read at odd address when writing + * to even address). + * + * This allows two independent process to do READ/MODIFY/WRITE. + * + * Copyright (C) 2014 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module f15_histo_mem #( + parameter integer ADDR_WIDTH = 16 +)( + // Port A Read + input wire [ADDR_WIDTH-1:0] addr_AR, + output reg [8:0] data_AR, + input wire ena_AR, + + // Port A Write + input wire [ADDR_WIDTH-1:0] addr_AW, + input wire [8:0] data_AW, + input wire ena_AW, + + // Port B Read + input wire [ADDR_WIDTH-1:0] addr_BR, + output reg [8:0] data_BR, + input wire ena_BR, + + // Port B Write + input wire [ADDR_WIDTH-1:0] addr_BW, + input wire [8:0] data_BW, + input wire ena_BW, + + // Error detection + output reg conflict_A, + output reg conflict_B, + + // Common + input wire clk, + input wire rst +); + + // Signals + // Memory banks IF + wire [ADDR_WIDTH-2:0] even_addra, odd_addra; + wire [ADDR_WIDTH-2:0] even_addrb, odd_addrb; + wire [8:0] even_dia, odd_dia; + wire [8:0] even_dib, odd_dib; + wire [8:0] even_doa, odd_doa; + wire [8:0] even_dob, odd_dob; + wire even_wea, odd_wea; + wire even_web, odd_web; + wire even_rea, odd_rea; + wire even_reb, odd_reb; + + // Control + wire sel_A, sel_B; + + + // Mux selection + assign sel_A = ena_AR ? addr_AR[0] : ~addr_AW[0]; + assign sel_B = ena_BR ? addr_BR[0] : ~addr_BW[0]; + + // Conflict detection + always @(posedge clk) + begin + conflict_A <= !(addr_AR[0] ^ addr_AW[0]) & ena_AR & ena_AW; + conflict_B <= !(addr_BR[0] ^ addr_BW[0]) & ena_BR & ena_BW; + end + + // Control signals + assign even_wea = sel_A & ena_AW; + assign odd_wea = !sel_A & ena_AW; + assign even_web = sel_B & ena_BW; + assign odd_web = !sel_B & ena_BW; + assign even_rea = !sel_A & ena_AR; + assign odd_rea = sel_A & ena_AR; + assign even_reb = !sel_B & ena_BR; + assign odd_reb = sel_B & ena_BR; + + // Address path mapping + assign even_addra = sel_A ? addr_AW[ADDR_WIDTH-1:1] : addr_AR[ADDR_WIDTH-1:1]; + assign even_addrb = sel_B ? addr_BW[ADDR_WIDTH-1:1] : addr_BR[ADDR_WIDTH-1:1]; + assign odd_addra = sel_A ? addr_AR[ADDR_WIDTH-1:1] : addr_AW[ADDR_WIDTH-1:1]; + assign odd_addrb = sel_B ? addr_BR[ADDR_WIDTH-1:1] : addr_BW[ADDR_WIDTH-1:1]; + + // Data path mapping + assign even_dia = data_AW; + assign odd_dia = data_AW; + assign even_dib = data_BW; + assign odd_dib = data_BW; + + always @(posedge clk) + begin + data_AR <= even_doa | odd_doa; + data_BR <= even_dob | odd_dob; + end + + // Instanciate memory banks + f15_histo_mem_bank #( + .ADDR_WIDTH(ADDR_WIDTH-1) + ) mem_even ( + .addra(even_addra), + .addrb(even_addrb), + .dia(even_dia), + .dib(even_dib), + .doa(even_doa), + .dob(even_dob), + .wea(even_wea), + .web(even_web), + .rea(even_rea), + .reb(even_reb), + .clk(clk), + .rst(rst) + ); + + f15_histo_mem_bank #( + .ADDR_WIDTH(ADDR_WIDTH-1) + ) mem_odd ( + .addra(odd_addra), + .addrb(odd_addrb), + .dia(odd_dia), + .dib(odd_dib), + .doa(odd_doa), + .dob(odd_dob), + .wea(odd_wea), + .web(odd_web), + .rea(odd_rea), + .reb(odd_reb), + .clk(clk), + .rst(rst) + ); + +endmodule // f15_histo_mem + + +module f15_histo_mem_bank #( + parameter integer ADDR_WIDTH = 15 +)( + input wire [ADDR_WIDTH-1:0] addra, + input wire [ADDR_WIDTH-1:0] addrb, + input wire [8:0] dia, + input wire [8:0] dib, + output reg [8:0] doa, + output reg [8:0] dob, + input wire wea, + input wire web, + input wire rea, + input wire reb, + input wire clk, + input wire rst +); + localparam integer N_BRAMS = 1 << (ADDR_WIDTH - 12); + genvar i; + integer j; + + // Signals + // Direct RAM connections + wire [15:0] ramb_addra; + wire [15:0] ramb_addrb; + wire [31:0] ramb_dia; + wire [31:0] ramb_dib; + wire [ 3:0] ramb_dipa; + wire [ 3:0] ramb_dipb; + wire [31:0] ramb_doa[0:N_BRAMS-1]; + wire [31:0] ramb_dob[0:N_BRAMS-1]; + wire [ 3:0] ramb_dopa[0:N_BRAMS-1]; + wire [ 3:0] ramb_dopb[0:N_BRAMS-1]; + wire ramb_wea[0:N_BRAMS-1]; + wire ramb_web[0:N_BRAMS-1]; + reg ramb_rstdoa[0:N_BRAMS-1]; + reg ramb_rstdob[0:N_BRAMS-1]; + + // Control + reg onehota[0:N_BRAMS-1]; + reg onehotb[0:N_BRAMS-1]; + + // Map address LSB and data inputs + assign ramb_addra = { 1'b0, addra[11:0], 3'b000 }; + assign ramb_addrb = { 1'b0, addrb[11:0], 3'b000 }; + + assign ramb_dia = { 16'h0000, dia[8:1] }; + assign ramb_dib = { 16'h0000, dib[8:1] }; + assign ramb_dipa = { 3'b000, dia[0] }; + assign ramb_dipb = { 3'b000, dib[0] }; + + // OR all the RAMB outputs + always @* + begin + doa = 9'h0; + dob = 9'h0; + for (j=0; j<N_BRAMS; j=j+1) begin + doa = doa | { ramb_doa[j][7:0], ramb_dopa[j][0] }; + dob = dob | { ramb_dob[j][7:0], ramb_dopb[j][0] }; + end + end + + // Generate array + generate + for (i=0; i<N_BRAMS; i=i+1) begin + + // Decode address MSB to one-hot signal + always @(addra,addrb) + begin + onehota[i] <= (addra[ADDR_WIDTH-1:12] == i) ? 1'b1 : 1'b0; + onehotb[i] <= (addrb[ADDR_WIDTH-1:12] == i) ? 1'b1 : 1'b0; + end + + // If no read, then reset the output reg to zero + always @(posedge clk) + begin + ramb_rstdoa[i] <= !(onehota[i] & rea); + ramb_rstdob[i] <= !(onehotb[i] & reb); + end + + // Mask the write enable with decoded address + assign ramb_wea[i] = onehota[i] & wea; + assign ramb_web[i] = onehotb[i] & web; + + // Instantiate RAM Block + RAMB36E1 #( + .RDADDR_COLLISION_HWCONFIG("PERFORMANCE"), + .SIM_COLLISION_CHECK("NONE"), + .DOA_REG(1), + .DOB_REG(1), + .EN_ECC_READ("FALSE"), + .EN_ECC_WRITE("FALSE"), + .RAM_EXTENSION_A("NONE"), + .RAM_EXTENSION_B("NONE"), + .RAM_MODE("TDP"), + .READ_WIDTH_A(9), + .READ_WIDTH_B(9), + .WRITE_WIDTH_A(9), + .WRITE_WIDTH_B(9), + .RSTREG_PRIORITY_A("RSTREG"), + .RSTREG_PRIORITY_B("RSTREG"), + .SIM_DEVICE("7SERIES"), + .SRVAL_A(36'h000000000), + .SRVAL_B(36'h000000000), + .WRITE_MODE_A("READ_FIRST"), + .WRITE_MODE_B("READ_FIRST") + ) + mem_elem_I ( + .DOADO(ramb_doa[i]), + .DOPADOP(ramb_dopa[i]), + .DOBDO(ramb_dob[i]), + .DOPBDOP(ramb_dopb[i]), + .CASCADEINA(1'b0), + .CASCADEINB(1'b0), + .INJECTDBITERR(1'b0), + .INJECTSBITERR(1'b0), + .ADDRARDADDR(ramb_addra), + .CLKARDCLK(clk), + .ENARDEN(1'b1), + .REGCEAREGCE(1'b1), + .RSTRAMARSTRAM(rst), + .RSTREGARSTREG(ramb_rstdoa[i]), + .WEA({3'b0, ramb_wea[i]}), + .DIADI(ramb_dia), + .DIPADIP(ramb_dipa), + .ADDRBWRADDR(ramb_addrb), + .CLKBWRCLK(clk), + .ENBWREN(1'b1), + .REGCEB(1'b1), + .RSTRAMB(rst), + .RSTREGB(ramb_rstdob[i]), + .WEBWE({7'b0, ramb_web[i]}), + .DIBDI(ramb_dib), + .DIPBDIP(ramb_dipb) + ); + + end + endgenerate + +endmodule // f15_histo_mem_bank diff --git a/fpga/usrp3/lib/rfnoc/fosphor/f15_line_mem.v b/fpga/usrp3/lib/rfnoc/fosphor/f15_line_mem.v new file mode 100644 index 000000000..7a98d5c6e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/f15_line_mem.v @@ -0,0 +1,67 @@ +/* + * f15_line_mem.v + * + * Memory for a single line to compute max-hold / average + * Read latency is 2 and if read is not enabled, output data is forced + * to zero. + * + * Copyright (C) 2015 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module f15_line_mem #( + parameter integer AWIDTH = 12, + parameter integer DWIDTH = 18 +)( + input wire [AWIDTH-1:0] rd_addr, + output reg [DWIDTH-1:0] rd_data, + input wire rd_ena, + + input wire [AWIDTH-1:0] wr_addr, + input wire [DWIDTH-1:0] wr_data, + input wire wr_ena, + + input wire clk, + input wire rst +); + + // Signals + reg [DWIDTH-1:0] ram [(1<<AWIDTH)-1:0]; + reg [DWIDTH-1:0] rd_data_r; + reg rd_ena_r; + +`ifdef SIM + integer i; + initial + for (i=0; i<(1<<AWIDTH); i=i+1) + ram[i] = 0; +`endif + + always @(posedge clk) + begin + // Read + rd_data_r <= ram[rd_addr]; + + // Write + if (wr_ena) + ram[wr_addr] <= wr_data; + + // Register the enable flag + rd_ena_r <= rd_ena; + + // Final read register + if (rd_ena_r) + rd_data <= rd_data_r; + else + rd_data <= 0; + end + +endmodule // f15_line_mem diff --git a/fpga/usrp3/lib/rfnoc/fosphor/f15_logpwr.v b/fpga/usrp3/lib/rfnoc/fosphor/f15_logpwr.v new file mode 100644 index 000000000..cd10bc56a --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/f15_logpwr.v @@ -0,0 +1,504 @@ +/* + * f15_logpwr.v + * + * Log Power computation + * Take a complex 16 bits input and outputs a 16 bits estimate + * of 2048 * log2(i^2+q^2). + * + * Fully-pipelined, 12 levels + * + * Copyright (C) 2014 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module f15_logpwr( + input wire [15:0] in_real_0, + input wire [15:0] in_imag_0, + output wire [15:0] out_12, + input wire [31:0] rng, + input wire [ 1:0] random_mode, /* [0] = lsb random ena, [1] = random add */ + input wire clk, + input wire rst +); + + // Signals + // Randomness control + reg [7:0] rng_lsb; + wire [6:0] opmode; + + // Power squared + wire [47:0] dsp_pchain_3; + wire [47:0] dsp_pout_4; + + wire [4:0] msb_check; + wire [31:0] pwr_4; + reg [31:0] pwr_5, pwr_6, pwr_7, pwr_8, pwr_9; + reg [4:0] log2_5, log2_6, log2_7, log2_8, log2_9; + + // LUT + wire [15:0] lut_addr_9; + wire [31:0] lut_do_11; + + wire msb_9, msb_11; + wire [4:0] lsbs_9, lsbs_11; + wire [4:0] log2_11; + + // Final value + reg [20:0] final_12; + + + // ------------- + // Power squared + // ------------- + // Output is (in_real * in_real) + (in_imag * in_imag) + // with possibly some random lsb filled in for in_{real,imag} and some + // noise added to the result. + + // Randomness control + always @(posedge clk) + if (random_mode[0]) + rng_lsb <= rng[31:24]; + else + rng_lsb <= 8'h00; + + assign opmode = random_mode[1] ? 7'b0110101 : 7'b0000101; + + // Square of in_real + noise + DSP48E1 #( + .A_INPUT("DIRECT"), + .B_INPUT("DIRECT"), + .USE_DPORT("FALSE"), + .USE_MULT("MULTIPLY"), + .AUTORESET_PATDET("NO_RESET"), + .MASK(48'h3fffffffffff), + .PATTERN(48'h000000000000), + .SEL_MASK("MASK"), + .SEL_PATTERN("PATTERN"), + .USE_PATTERN_DETECT("NO_PATDET"), + .ACASCREG(1), + .ADREG(0), + .ALUMODEREG(1), + .AREG(1), + .BCASCREG(1), + .BREG(1), + .CARRYINREG(1), + .CARRYINSELREG(1), + .CREG(1), + .DREG(0), + .INMODEREG(1), + .MREG(1), + .OPMODEREG(1), + .PREG(1), + .USE_SIMD("ONE48") + ) + dsp_real_sq_I ( + .PCOUT(dsp_pchain_3), + .ACIN(30'h0000), + .BCIN(18'h000), + .CARRYCASCIN(1'h0), + .MULTSIGNIN(1'h0), + .PCIN(48'h000000000000), + .ALUMODE(4'b0000), // Z + X + Y + CIN + .CARRYINSEL(3'h0), + .CEINMODE(1'b1), + .CLK(clk), + .INMODE(5'b00000), // B=B2, A=A2 + .OPMODE(opmode), // X=M1, Y=M2, Z=(random_mode[1] ? C : 0) + .RSTINMODE(rst), + .A({{12{in_real_0[15]}}, in_real_0, rng_lsb[7:6]}), + .B({ in_real_0, rng_lsb[5:4]}), + .C({{41{1'b0}},rng[6:0]}), + .CARRYIN(1'b0), + .D(25'h0000), + .CEA1(1'b0), + .CEA2(1'b1), + .CEAD(1'b0), + .CEALUMODE(1'b1), + .CEB1(1'b0), + .CEB2(1'b1), + .CEC(1'b1), + .CECARRYIN(1'b1), + .CECTRL(1'b1), + .CED(1'b0), + .CEM(1'b1), + .CEP(1'b1), + .RSTA(rst), + .RSTALLCARRYIN(rst), + .RSTALUMODE(rst), + .RSTB(rst), + .RSTC(rst), + .RSTCTRL(rst), + .RSTD(rst), + .RSTM(rst), + .RSTP(rst) + ); + + // Square of in_imag and final sum + DSP48E1 #( + .A_INPUT("DIRECT"), + .B_INPUT("DIRECT"), + .USE_DPORT("FALSE"), + .USE_MULT("MULTIPLY"), + .AUTORESET_PATDET("NO_RESET"), + .MASK(48'h3fffffffffff), + .PATTERN(48'h000000000000), + .SEL_MASK("MASK"), + .SEL_PATTERN("PATTERN"), + .USE_PATTERN_DETECT("NO_PATDET"), + .ACASCREG(1), + .ADREG(0), + .ALUMODEREG(1), + .AREG(2), + .BCASCREG(1), + .BREG(2), + .CARRYINREG(1), + .CARRYINSELREG(1), + .CREG(1), + .DREG(0), + .INMODEREG(1), + .MREG(1), + .OPMODEREG(1), + .PREG(1), + .USE_SIMD("ONE48") + ) + dsp_imag_sq_I ( + .P(dsp_pout_4), + .ACIN(30'h0000), + .BCIN(18'h000), + .CARRYCASCIN(1'h0), + .MULTSIGNIN(1'h0), + .PCIN(dsp_pchain_3), + .ALUMODE(4'b0000), // Z + X + Y + CIN + .CARRYINSEL(3'h0), + .CEINMODE(1'b1), + .CLK(clk), + .INMODE(5'b00000), // B=B2, A=A2 + .OPMODE(7'b0010101), // X=M1, Y=M2, Z=PCIN + .RSTINMODE(rst), + .A({{12{in_imag_0[15]}}, in_imag_0, rng_lsb[3:2]}), + .B({ in_imag_0, rng_lsb[1:0]}), + .C(48'h0000), + .CARRYIN(1'b0), + .D(25'h0000), + .CEA1(1'b1), + .CEA2(1'b1), + .CEAD(1'b0), + .CEALUMODE(1'b1), + .CEB1(1'b1), + .CEB2(1'b1), + .CEC(1'b1), + .CECARRYIN(1'b1), + .CECTRL(1'b1), + .CED(1'b0), + .CEM(1'b1), + .CEP(1'b1), + .RSTA(rst), + .RSTALLCARRYIN(rst), + .RSTALUMODE(rst), + .RSTB(rst), + .RSTC(rst), + .RSTCTRL(rst), + .RSTD(rst), + .RSTM(rst), + .RSTP(rst) + ); + + assign pwr_4 = dsp_pout_4[35:4]; + + + // ---------------------------------- + // Log2 computation and normalization + // ---------------------------------- + // When shifting, instead of zero filling, we fill with RNG data + // Again, this helps reduce the visible quantization effects + // for very low power values. + + // First stage + assign msb_check[4] = |(pwr_4[31:16]); + + always @(posedge clk) + begin + if (msb_check[4]) + pwr_5 <= pwr_4; + else + pwr_5 <= { pwr_4[15:0], rng[31:16] }; + + log2_5 <= { msb_check[4], 4'b0000 }; + end + + // Second stage + assign msb_check[3] = |(pwr_5[31:24]); + + always @(posedge clk) + begin + if (msb_check[3]) + pwr_6 <= pwr_5; + else + pwr_6 <= { pwr_5[23:0], rng[15:8] }; + + log2_6 <= { log2_5[4], msb_check[3], 3'b000 }; + end + + // Third stage + assign msb_check[2] = |(pwr_6[31:28]); + + always @(posedge clk) + begin + if (msb_check[2]) + pwr_7 <= pwr_6; + else + pwr_7 <= { pwr_6[27:0], rng[7:4] }; + + log2_7 <= { log2_6[4:3], msb_check[2], 2'b00 }; + end + + // Fourth stage + assign msb_check[1] = |(pwr_7[31:30]); + + always @(posedge clk) + begin + if (msb_check[1]) + pwr_8 <= pwr_7; + else + pwr_8 <= { pwr_7[29:0], rng[3:2] }; + + log2_8 <= { log2_7[4:2], msb_check[1], 1'b0 }; + end + + // Final stage + assign msb_check[0] = pwr_8[31]; + + always @(posedge clk) + begin + if (msb_check[0]) + pwr_9 <= pwr_8; + else + pwr_9 <= { pwr_8[30:0], rng[1] }; + + log2_9 <= { log2_8[4:1], msb_check[0] }; + log2_9 <= { log2_8[4:1], msb_check[0] }; + end + + + // ---------- + // LUT lookup + // ---------- + + // Address mapping + assign lut_addr_9 = { 1'b0, pwr_9[30:20], 4'h0 }; + + // Actual LUT + RAMB36E1 #( + .RDADDR_COLLISION_HWCONFIG("PERFORMANCE"), + .SIM_COLLISION_CHECK("NONE"), + .DOA_REG(1), + .DOB_REG(1), + .EN_ECC_READ("FALSE"), + .EN_ECC_WRITE("FALSE"), + .INIT_00(256'h02b202840256022801fa01cd019f01710143011500e700b8008a005c002e0000), + .INIT_01(256'h058c055f0531050404d604a9047b044e042003f203c503970369033b030e02e0), + .INIT_02(256'h08610834080707da07ad077f0752072506f806ca069d06700642061505e705ba), + .INIT_03(256'h0b310b040ad70aaa0a7d0a500a2409f709ca099d09700943091608e908bc088e), + .INIT_04(256'h0dfb0dce0da20d750d490d1c0cef0cc30c960c6a0c3d0c100be40bb70b8a0b5d), + .INIT_05(256'h10bf10931067103b100e0fe20fb60f8a0f5d0f310f050ed90eac0e800e530e27), + .INIT_06(256'h137e1353132712fb12cf12a31277124b121f11f311c7119b116f1143111710eb), + .INIT_07(256'h1639160d15e215b6158a155f1533150814dc14b014851459142d140213d613aa), + .INIT_08(256'h18ed18c21897186c1841181517ea17bf17941768173d171216e616bb168f1664), + .INIT_09(256'h1b9d1b731b481b1d1af21ac71a9c1a711a461a1b19f019c5199a196f19441919), + .INIT_0A(256'h1e481e1e1df31dc91d9e1d741d491d1e1cf41cc91c9e1c731c491c1e1bf31bc8), + .INIT_0B(256'h20ee20c4209a20702045201b1ff11fc61f9c1f721f471f1d1ef21ec81e9d1e73), + .INIT_0C(256'h23902366233c231222e822be2294226a2240221621ec21c12197216d21432119), + .INIT_0D(256'h262c260325d925af2586255c2532250824df24b5248b24612437240d23e423ba), + .INIT_0E(256'h28c4289b28712848281e27f527cc27a22779274f272626fc26d326a9267f2656), + .INIT_0F(256'h2b572b2e2b052adc2ab32a8a2a612a372a0e29e529bc299229692940291728ed), + .INIT_10(256'h2de62dbd2d942d6b2d432d1a2cf12cc82c9f2c762c4d2c242bfb2bd22ba92b80), + .INIT_11(256'h30703047301f2ff62fce2fa52f7d2f542f2b2f032eda2eb12e892e602e372e0f), + .INIT_12(256'h32f632cd32a5327d3255322c320431dc31b3318b3162313a311230e930c13098), + .INIT_13(256'h3577354f352734ff34d734af3487345f3437340f33e733be3396336e3346331e), + .INIT_14(256'h37f437cc37a4377d3755372d370536de36b6368e3666363e361635ef35c7359f), + .INIT_15(256'h3a6c3a453a1e39f639cf39a7398039583931390938e238ba3892386b3843381b), + .INIT_16(256'h3ce13cba3c933c6b3c443c1d3bf63bcf3ba73b803b593b313b0a3ae33abb3a94), + .INIT_17(256'h3f513f2a3f033edd3eb63e8f3e683e413e1a3df33dcc3da53d7d3d563d2f3d08), + .INIT_18(256'h41be41974170414a412340fc40d540af40884061403a40143fed3fc63f9f3f78), + .INIT_19(256'h442643ff43d943b3438c4366433f431942f242cc42a5427f42584231420b41e4), + .INIT_1A(256'h468a4664463e461745f145cb45a5457f45584532450c44e544bf44994472444c), + .INIT_1B(256'h48ea48c4489e48784853482d480747e147bb4795476f4748472246fc46d646b0), + .INIT_1C(256'h4b474b214afb4ad64ab04a8a4a644a3f4a1949f349cd49a84982495c49364910), + .INIT_1D(256'h4d9f4d7a4d544d2f4d094ce44cbe4c994c734c4e4c284c034bdd4bb84b924b6c), + .INIT_1E(256'h4ff44fcf4faa4f844f5f4f3a4f154eef4eca4ea54e7f4e5a4e354e0f4dea4dc5), + .INIT_1F(256'h5245522051fb51d651b1518c51675142511d50f850d350ae50895063503e5019), + .INIT_20(256'h5492546e5449542453ff53da53b65391536c5347532252fd52d952b4528f526a), + .INIT_21(256'h56dc56b75693566e564a5625560155dc55b85593556e554a5525550054dc54b7), + .INIT_22(256'h592258fe58d958b55891586c5848582457ff57db57b75792576e574957255700), + .INIT_23(256'h5b645b405b1c5af85ad45ab05a8c5a685a445a2059fb59d759b3598f596a5946), + .INIT_24(256'h5da35d805d5c5d385d145cf05ccc5ca85c845c605c3d5c195bf55bd15bad5b89), + .INIT_25(256'h5fdf5fbb5f985f745f505f2d5f095ee55ec25e9e5e7a5e565e335e0f5deb5dc7), + .INIT_26(256'h621761f461d061ad618961666142611f60fb60d860b46091606d604a60266003), + .INIT_27(256'h644c6429640563e263bf639c637863556332630f62eb62c862a56281625e623a), + .INIT_28(256'h667d665a6637661465f165ce65ab658865656542651f64fc64d864b56492646f), + .INIT_29(256'h68ab688868666843682067fd67da67b767946772674f672c670966e666c366a0), + .INIT_2A(256'h6ad66ab36a916a6e6a4b6a296a0669e469c1699e697b69596936691368f168ce), + .INIT_2B(256'h6cfd6cdb6cb96c966c746c516c2f6c0c6bea6bc76ba56b836b606b3d6b1b6af8), + .INIT_2C(256'h6f226eff6edd6ebb6e996e776e546e326e106dee6dcb6da96d876d646d426d20), + .INIT_2D(256'h7143712170ff70dd70bb709970777055703370116fee6fcc6faa6f886f666f44), + .INIT_2E(256'h7361733f731d72fb72da72b87296727472527230720e71ec71cb71a971877165), + .INIT_2F(256'h757c755a7539751774f574d474b27490746f744d742b740a73e873c673a47383), + .INIT_30(256'h779477727751772f770e76ec76cb76aa7688766776457624760275e075bf759d), + .INIT_31(256'h79a87987796679457924790278e178c0789e787d785c783a781977f877d677b5), + .INIT_32(256'h7bba7b997b787b577b367b157af47ad37ab27a917a707a4e7a2d7a0c79eb79ca), + .INIT_33(256'h7dc97da87d887d677d467d257d047ce37cc27ca17c807c5f7c3e7c1d7bfc7bdb), + .INIT_34(256'h7fd57fb57f947f737f537f327f117ef07ed07eaf7e8e7e6d7e4d7e2c7e0b7dea), + .INIT_35(256'h81de81be819d817d815c813c811b80fb80da80ba809980788058803780177ff6), + .INIT_36(256'h83e583c483a48384836383438323830282e282c182a1828182608240821f81ff), + .INIT_37(256'h85e885c885a88588856785478527850784e784c684a684868466844584258405), + .INIT_38(256'h87e987c987a98789876987498729870986e986c986a986898668864886288608), + .INIT_39(256'h89e789c789a78987896789488928890888e888c888a888888868884888298809), + .INIT_3A(256'h8be28bc28ba28b838b638b438b248b048ae48ac58aa58a858a668a468a268a06), + .INIT_3B(256'h8dda8dbb8d9b8d7c8d5c8d3d8d1d8cfe8cde8cbf8c9f8c808c608c408c218c01), + .INIT_3C(256'h8fd08fb18f918f728f538f338f148ef58ed58eb68e978e778e588e388e198dfa), + .INIT_3D(256'h91c391a49185916691469127910890e990ca90ab908b906c904d902e900e8fef), + .INIT_3E(256'h93b39394937693579338931992fa92db92bc929c927d925e923f9220920191e2), + .INIT_3F(256'h95a19583956495459526950794e894ca94ab948c946d944e942f941093f193d2), + .INIT_40(256'h978d976e974f9731971296f396d596b696979679965a963b961c95fe95df95c0), + .INIT_41(256'h997599579938991a98fb98dd98be98a09881986398449826980797e897ca97ab), + .INIT_42(256'h9b5c9b3d9b1f9b019ae29ac49aa69a879a699a4a9a2c9a0e99ef99d199b29994), + .INIT_43(256'h9d3f9d219d039ce59cc79ca99c8a9c6c9c4e9c309c119bf39bd59bb79b989b7a), + .INIT_44(256'h9f219f039ee59ec79ea99e8b9e6d9e4f9e309e129df49dd69db89d9a9d7c9d5e), + .INIT_45(256'ha100a0e2a0c4a0a6a088a06aa04ca02ea0119ff39fd59fb79f999f7b9f5d9f3f), + .INIT_46(256'ha2dca2bea2a1a283a265a247a22aa20ca1eea1d0a1b3a195a177a159a13ba11e), + .INIT_47(256'ha4b6a499a47ba45ea440a422a405a3e7a3c9a3aca38ea371a353a335a318a2fa), + .INIT_48(256'ha68ea671a653a636a618a5fba5dda5c0a5a2a585a567a54aa52ca50fa4f1a4d4), + .INIT_49(256'ha863a846a829a80ba7eea7d1a7b4a796a779a75ca73ea721a703a6e6a6c9a6ab), + .INIT_4A(256'haa36aa19a9fca9dfa9c2a9a5a987a96aa94da930a913a8f5a8d8a8bba89ea881), + .INIT_4B(256'hac07abeaabcdabb0ab93ab76ab59ab3cab1fab02aae5aac8aaabaa8eaa71aa53), + .INIT_4C(256'hadd6adb9ad9cad7fad62ad45ad28ad0cacefacd2acb5ac98ac7bac5eac41ac24), + .INIT_4D(256'hafa2af85af68af4caf2faf12aef5aed9aebcae9fae82ae66ae49ae2cae0fadf2), + .INIT_4E(256'hb16cb14fb133b116b0fab0ddb0c0b0a4b087b06ab04eb031b015aff8afdbafbf), + .INIT_4F(256'hb334b317b2fbb2deb2c2b2a5b289b26cb250b233b217b1fab1deb1c1b1a5b188), + .INIT_50(256'hb4f9b4ddb4c1b4a4b488b46cb44fb433b417b3fab3deb3c2b3a5b389b36cb350), + .INIT_51(256'hb6bdb6a1b684b668b64cb630b614b5f7b5dbb5bfb5a3b587b56ab54eb532b515), + .INIT_52(256'hb87eb862b846b82ab80eb7f2b7d6b7bab79eb781b765b749b72db711b6f5b6d9), + .INIT_53(256'hba3dba21ba05b9e9b9ceb9b2b996b97ab95eb942b926b90ab8eeb8d2b8b6b89a), + .INIT_54(256'hbbfabbdebbc3bba7bb8bbb6fbb54bb38bb1cbb00bae4bac8baadba91ba75ba59), + .INIT_55(256'hbdb5bd9abd7ebd62bd47bd2bbd0fbcf4bcd8bcbcbca1bc85bc69bc4dbc32bc16), + .INIT_56(256'hbf6ebf53bf37bf1cbf00bee5bec9beadbe92be76be5bbe3fbe24be08bdecbdd1), + .INIT_57(256'hc125c10ac0eec0d3c0b7c09cc081c065c04ac02ec013bff7bfdcbfc1bfa5bf8a), + .INIT_58(256'hc2dac2bfc2a3c288c26dc251c236c21bc200c1e4c1c9c1aec192c177c15cc140), + .INIT_59(256'hc48dc472c456c43bc420c405c3eac3cfc3b3c398c37dc362c347c32bc310c2f5), + .INIT_5A(256'hc63dc622c607c5ecc5d1c5b6c59bc580c565c54ac52fc514c4f9c4dec4c3c4a8), + .INIT_5B(256'hc7ecc7d1c7b7c79cc781c766c74bc730c715c6fac6dfc6c4c6a9c68ec673c658), + .INIT_5C(256'hc999c97ec964c949c92ec913c8f9c8dec8c3c8a8c88dc873c858c83dc822c807), + .INIT_5D(256'hcb44cb2acb0fcaf4cadacabfcaa4ca8aca6fca54ca3aca1fca04c9e9c9cfc9b4), + .INIT_5E(256'hccedccd3ccb8cc9ecc83cc69cc4ecc34cc19cbfecbe4cbc9cbafcb94cb79cb5f), + .INIT_5F(256'hce94ce7ace60ce45ce2bce10cdf6cddccdc1cda7cd8ccd72cd57cd3dcd22cd08), + .INIT_60(256'hd03ad01fd005cfebcfd1cfb6cf9ccf82cf67cf4dcf33cf18cefecee4cec9ceaf), + .INIT_61(256'hd1ddd1c3d1a9d18fd174d15ad140d126d10cd0f1d0d7d0bdd0a3d088d06ed054), + .INIT_62(256'hd37fd365d34bd330d316d2fcd2e2d2c8d2aed294d27ad260d246d22cd211d1f7), + .INIT_63(256'hd51ed504d4ead4d1d4b7d49dd483d469d44fd435d41bd401d3e7d3cdd3b3d399), + .INIT_64(256'hd6bcd6a2d689d66fd655d63bd621d607d5eed5d4d5bad5a0d586d56cd552d538), + .INIT_65(256'hd858d83fd825d80bd7f1d7d8d7bed7a4d78bd771d757d73dd723d70ad6f0d6d6), + .INIT_66(256'hd9f3d9d9d9bfd9a6d98cd973d959d93fd926d90cd8f2d8d9d8bfd8a5d88cd872), + .INIT_67(256'hdb8bdb72db58db3fdb25db0cdaf2dad9dabfdaa6da8cda72da59da3fda26da0c), + .INIT_68(256'hdd22dd09dcefdcd6dcbcdca3dc8adc70dc57dc3ddc24dc0adbf1dbd8dbbedba5), + .INIT_69(256'hdeb7de9ede84de6bde52de39de1fde06ddedddd3ddbadda1dd87dd6edd55dd3b), + .INIT_6A(256'he04ae031e018dfffdfe6dfccdfb3df9adf81df68df4edf35df1cdf03dee9ded0), + .INIT_6B(256'he1dce1c3e1aae191e178e15fe145e12ce113e0fae0e1e0c8e0afe096e07de063), + .INIT_6C(256'he36ce353e33ae321e308e2efe2d6e2bde2a4e28be272e259e240e227e20ee1f5), + .INIT_6D(256'he4fae4e1e4c8e4afe497e47ee465e44ce433e41ae401e3e8e3cfe3b7e39ee385), + .INIT_6E(256'he686e66ee655e63ce624e60be5f2e5d9e5c0e5a8e58fe576e55de544e52ce513), + .INIT_6F(256'he811e7f9e7e0e7c7e7afe796e77de765e74ce733e71be702e6e9e6d1e6b8e69f), + .INIT_70(256'he99be982e96ae951e938e920e907e8efe8d6e8bee8a5e88ce874e85be843e82a), + .INIT_71(256'heb22eb0aeaf1ead9eac0eaa8ea90ea77ea5fea46ea2eea15e9fde9e4e9cce9b3), + .INIT_72(256'heca8ec90ec78ec5fec47ec2eec16ebfeebe5ebcdebb5eb9ceb84eb6beb53eb3b), + .INIT_73(256'hee2dee14edfcede4edccedb3ed9bed83ed6bed52ed3aed22ed09ecf1ecd9ecc1), + .INIT_74(256'hefafef97ef7fef67ef4fef37ef1fef06eeeeeed6eebeeea6ee8dee75ee5dee45), + .INIT_75(256'hf131f119f101f0e8f0d0f0b8f0a0f088f070f058f040f028f010eff8efe0efc8), + .INIT_76(256'hf2b0f298f280f268f251f239f221f209f1f1f1d9f1c1f1a9f191f179f161f149), + .INIT_77(256'hf42ef417f3fff3e7f3cff3b7f39ff387f370f358f340f328f310f2f8f2e0f2c8), + .INIT_78(256'hf5abf593f57bf564f54cf534f51cf505f4edf4d5f4bdf4a5f48ef476f45ef446), + .INIT_79(256'hf726f70ef6f7f6dff6c7f6b0f698f680f669f651f639f622f60af5f2f5daf5c3), + .INIT_7A(256'hf8a0f888f870f859f841f82af812f7fbf7e3f7cbf7b4f79cf785f76df755f73e), + .INIT_7B(256'hfa18fa00f9e9f9d1f9baf9a2f98bf973f95cf944f92df915f8fef8e6f8cff8b7), + .INIT_7C(256'hfb8efb77fb5ffb48fb31fb19fb02faeafad3fabcfaa4fa8dfa75fa5efa46fa2f), + .INIT_7D(256'hfd03fcecfcd5fcbdfca6fc8ffc77fc60fc49fc32fc1afc03fbecfbd4fbbdfba5), + .INIT_7E(256'hfe77fe60fe48fe31fe1afe03fdecfdd4fdbdfda6fd8ffd77fd60fd49fd32fd1a), + .INIT_7F(256'hffe9ffd2ffbbffa4ff8dff75ff5eff47ff30ff19ff02feebfed3febcfea5fe8e), + .INIT_A(36'h000000000), + .INIT_B(36'h000000000), + .INIT_FILE("NONE"), + .RAM_MODE("TDP"), + .RAM_EXTENSION_A("NONE"), + .RAM_EXTENSION_B("NONE"), + .READ_WIDTH_A(18), + .READ_WIDTH_B(0), + .WRITE_WIDTH_A(0), + .WRITE_WIDTH_B(36), // the RAMB36E1 model fails without this + .RSTREG_PRIORITY_A("RSTREG"), + .RSTREG_PRIORITY_B("RSTREG"), + .SRVAL_A(36'h000000000), + .SRVAL_B(36'h000000000), + .SIM_DEVICE("7SERIES"), + .WRITE_MODE_A("READ_FIRST"), + .WRITE_MODE_B("READ_FIRST") + ) + log_lut_I ( + .DOADO(lut_do_11), + .CASCADEINA(1'b0), + .CASCADEINB(1'b0), + .INJECTDBITERR(1'b0), + .INJECTSBITERR(1'b0), + .ADDRARDADDR(lut_addr_9), + .CLKARDCLK(clk), + .ENARDEN(1'b1), + .REGCEAREGCE(1'b1), + .RSTRAMARSTRAM(rst), + .RSTREGARSTREG(rst), + .WEA(4'h0), + .DIADI(32'h00000000), + .DIPADIP(4'h0), + .ADDRBWRADDR(16'h0000), + .CLKBWRCLK(1'b0), + .ENBWREN(1'b0), + .REGCEB(1'b0), + .RSTRAMB(1'b0), + .RSTREGB(1'b0), + .WEBWE(8'h0), + .DIBDI(32'h00000000), + .DIPBDIP(4'h0) + ); + + // LSBs mapping + assign msb_9 = pwr_9[31]; + assign lsbs_9 = pwr_9[19:15]; + + // Delay lines to compensate for LUT delay + delay_bit #(2) dl_msb (msb_9, msb_11, clk); + delay_bus #(2, 5) dl_lsbs (lsbs_9, lsbs_11, clk); + delay_bus #(2, 5) dl_log2 (log2_9, log2_11, clk); + + + // ----------- + // Final value + // ----------- + + // Final add & saturation + always @(posedge clk) + begin + if (!msb_11) + final_12 <= 16'h0000; + else + final_12 <= { log2_11, lut_do_11[15:0] } + lsbs_11; + end + + // Mapping + assign out_12 = final_12[20:5]; + +endmodule // f15_logpwr diff --git a/fpga/usrp3/lib/rfnoc/fosphor/f15_maxhold.v b/fpga/usrp3/lib/rfnoc/fosphor/f15_maxhold.v new file mode 100644 index 000000000..ecd92adfb --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/f15_maxhold.v @@ -0,0 +1,71 @@ +/* + * f15_maxhold.v + * + * Computes the max hold (with epsilon decay) + * + * Copyright (C) 2015 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module f15_maxhold #( + parameter integer Y_WIDTH = 12, + parameter integer X_WIDTH = 16, + parameter integer FRAC_WIDTH = 8 +)( + input wire [Y_WIDTH-1:0] yin_0, + input wire [X_WIDTH-1:0] x_0, + input wire [15:0] rng_0, + input wire [15:0] epsilon_0, + input wire clear_0, + output wire [Y_WIDTH-1:0] yout_4, + input wire clk, + input wire rst +); + + localparam integer I_WIDTH = X_WIDTH + FRAC_WIDTH; + + // Signals + reg [X_WIDTH-1:0] x_1; + reg [I_WIDTH :0] y_1; + reg [Y_WIDTH :0] d_1; + reg clear_1; + + reg [Y_WIDTH-1:0] y_2; + + // Stage 1 + always @(posedge clk) + begin + x_1 <= x_0; + y_1 <= { 1'b0, yin_0, rng_0[I_WIDTH-Y_WIDTH-1:0] } - epsilon_0; + d_1 <= { 1'b0, yin_0 } - { 1'b0, x_0[X_WIDTH-1:X_WIDTH-Y_WIDTH] }; + clear_1 <= clear_0; + end + + // Stage 2 + always @(posedge clk) + begin + if (clear_1) + y_2 <= 0; + else if (d_1[Y_WIDTH]) + // x is larger, use this + y_2 <= x_1[X_WIDTH-1:X_WIDTH-Y_WIDTH]; + else + // y is larger, take old y with small decay + if (y_1[I_WIDTH]) + y_2 <= 0; + else + y_2 <= y_1[I_WIDTH-1:I_WIDTH-Y_WIDTH]; + end + + // Apply two more delay to match the avg block + delay_bus #(2, Y_WIDTH) dl_y (y_2, yout_4, clk); + +endmodule // f15_maxhold diff --git a/fpga/usrp3/lib/rfnoc/fosphor/f15_packetizer.v b/fpga/usrp3/lib/rfnoc/fosphor/f15_packetizer.v new file mode 100644 index 000000000..d8ff2ae34 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/f15_packetizer.v @@ -0,0 +1,136 @@ +/* + * f15_packetizer.v + * + * Copyright (C) 2015 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module f15_packetizer #( + parameter integer BIN_WIDTH = 6, + parameter integer DECIM_WIDTH = 10 +)( + input wire [BIN_WIDTH-1:0] in_bin_addr, + input wire in_bin_last, + input wire [7:0] in_histo, + input wire [7:0] in_spectra_max, + input wire [7:0] in_spectra_avg, + input wire in_last, + input wire in_valid, + + output reg [31:0] out_data, + output reg out_last, + output reg out_eob, + output reg out_valid, + + input wire [DECIM_WIDTH-1:0] cfg_decim, + input wire cfg_decim_changed, + + input wire clk, + input wire rst +); + + // FSM + localparam + ST_WAIT = 0, + ST_SEND_HISTO = 1, + ST_SEND_MAX = 2, + ST_SEND_AVG = 3; + + reg [1:0] state; + + // Signals + reg [DECIM_WIDTH:0] decim_cnt; + reg [1:0] bcnt; + + // 1-in-N decimation counter + always @(posedge clk) + begin + if (rst) + decim_cnt <= 0; + else if (cfg_decim_changed) + // Force Reload + decim_cnt <= { 1'b0, cfg_decim }; + else if (in_valid & in_bin_last & in_last) + if (decim_cnt[DECIM_WIDTH]) + // Reload + decim_cnt <= { 1'b0, cfg_decim }; + else + // Just decrement + decim_cnt <= decim_cnt - 1; + end + + // FSM + always @(posedge clk) + begin + if (rst) + state <= ST_WAIT; + else if (in_valid & in_last) + case (state) + ST_WAIT: + if (in_bin_last & decim_cnt[DECIM_WIDTH]) + state <= ST_SEND_HISTO; + + ST_SEND_HISTO: + if (in_bin_last) + state <= ST_SEND_MAX; + + ST_SEND_MAX: + state <= ST_SEND_AVG; + + ST_SEND_AVG: + state <= ST_WAIT; + endcase + end + + // Byte counter + always @(posedge clk) + begin + if (rst) + bcnt <= 2'b00; + else if (in_valid) + if (in_last | (bcnt == 2'b11)) + bcnt <= 2'b00; + else + bcnt <= bcnt + 1; + end + + // Input mux & shift register + always @(posedge clk) + begin + if (in_valid) + begin + // Shift + out_data[31:8] <= out_data[23:0]; + + // New LSBs + case (state) + ST_SEND_HISTO: out_data[7:0] <= in_histo; + ST_SEND_MAX: out_data[7:0] <= in_spectra_max; + ST_SEND_AVG: out_data[7:0] <= in_spectra_avg; + endcase + end + end + + // Output last, eob, valid + always @(posedge clk) + begin + if (rst) begin + out_last <= 1'b0; + out_eob <= 1'b0; + out_valid <= 1'b0; + end else begin + out_last <= in_last; + out_eob <= (state == ST_SEND_AVG); + out_valid <= in_valid & (in_last | bcnt == 2'b11) & (state != ST_WAIT); + end + end + +endmodule // f15_packetizer diff --git a/fpga/usrp3/lib/rfnoc/fosphor/f15_rise_decay.v b/fpga/usrp3/lib/rfnoc/fosphor/f15_rise_decay.v new file mode 100644 index 000000000..aeba14c7f --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/f15_rise_decay.v @@ -0,0 +1,160 @@ +/* + * f15_rise_decay.v + * + * Applies the rise or decay to a given value. + * + * Copyright (C) 2014 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module f15_rise_decay #( + parameter integer WIDTH = 9 +)( + input wire [WIDTH-1:0] in_0, // input + output reg [WIDTH-1:0] out_5, // output + input wire [15:0] k_0, // time constant + input wire ena_0, // If ena=0, then output original value + input wire mode_0, // 0=rise, 1=decay + input wire [15:0] rng, + input wire clk, + input wire rst +); + + // Signals + reg mode_1; + reg [4:0] inmode_1; + + wire [WIDTH-1:0] in_2; + wire ena_2; + wire [6:0] opmode_2; + reg [3:0] alumode_2; + + wire [47:0] pout_4; + wire pmatch_4; + + + // Main DSP + // -------- + + // Mode control + // For rise we have INMODE=00000 (A=A2, B=B2), ALUMODE=0000 (C+M) + // For decay we have INMODE=01100 (A=D-A2, B=B2), ALUMODE=0011 (C-M) + always @(posedge clk) + begin + mode_1 <= mode_0; + + if (mode_0) + inmode_1 <= 5'b00000; + else + inmode_1 <= 5'b01100; + + if (mode_1) + alumode_2 <= 4'b0011; + else + alumode_2 <= 4'b0000; + end + + // When not enabled, we use OPMODE to do pass-through + delay_bit #(2) dl_ena (ena_0, ena_2, clk); + assign opmode_2 = ena_2 ? 7'b0110101 : 7'b0110000; + + // Delay for input to C + delay_bus #(2, WIDTH) dl_in (in_0, in_2, clk); + + // Instance + DSP48E1 #( + .A_INPUT("DIRECT"), + .B_INPUT("DIRECT"), + .USE_DPORT("TRUE"), + .USE_MULT("MULTIPLY"), + .AUTORESET_PATDET("NO_RESET"), + .MASK({1'b1, {(31-WIDTH){1'b0}}, {(WIDTH+16){1'b1}}}), + .PATTERN(48'h000000000000), + .SEL_MASK("MASK"), + .SEL_PATTERN("PATTERN"), + .USE_PATTERN_DETECT("PATDET"), + .ACASCREG(1), + .ADREG(1), + .ALUMODEREG(1), + .AREG(1), + .BCASCREG(2), + .BREG(2), + .CARRYINREG(1), + .CARRYINSELREG(1), + .CREG(1), + .DREG(1), + .INMODEREG(1), + .MREG(1), + .OPMODEREG(1), + .PREG(1), + .USE_SIMD("ONE48") + ) + dsp_exp_I ( + .PATTERNDETECT(pmatch_4), + .P(pout_4), + .ACIN(30'h0), + .BCIN(18'h0), + .CARRYCASCIN(1'h0), + .MULTSIGNIN(1'h0), + .PCIN(48'h000000000000), + .ALUMODE(alumode_2), + .CARRYINSEL(3'h0), + .CEINMODE(1'b1), + .CLK(clk), + .INMODE(inmode_1), + .OPMODE(opmode_2), + .RSTINMODE(rst), + .A({{(30-WIDTH){1'b0}}, in_0}), + .B({ 2'h0, k_0}), + .C({{(32-WIDTH){1'b0}}, in_2, rng}), + .CARRYIN(1'b0), + .D({{(24-WIDTH){1'b0}}, 1'b1, {WIDTH{1'b0}}}), + .CEA1(1'b0), + .CEA2(1'b1), + .CEAD(1'b1), + .CEALUMODE(1'b1), + .CEB1(1'b1), + .CEB2(1'b1), + .CEC(1'b1), + .CECARRYIN(1'b1), + .CECTRL(1'b1), + .CED(1'b1), + .CEM(1'b1), + .CEP(1'b1), + .RSTA(rst), + .RSTALLCARRYIN(rst), + .RSTALUMODE(rst), + .RSTB(rst), + .RSTC(rst), + .RSTCTRL(rst), + .RSTD(rst), + .RSTM(rst), + .RSTP(rst) + ); + + + // Saturation + // ---------- + + always @(posedge clk) + begin + if (rst == 1) + out_5 <= 0; + else + if (pout_4[47] == 1) + out_5 <= {WIDTH{1'b0}}; + else if (pmatch_4 == 0) + out_5 <= {WIDTH{1'b1}}; + else + out_5 <= pout_4[WIDTH+15:16]; + end + +endmodule // f15_rise_decay diff --git a/fpga/usrp3/lib/rfnoc/fosphor/f15_wf_agg.v b/fpga/usrp3/lib/rfnoc/fosphor/f15_wf_agg.v new file mode 100644 index 000000000..7b12bc4d5 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/f15_wf_agg.v @@ -0,0 +1,189 @@ +/* + * f15_wf_agg.v + * + * Watefall Aggregation + * + * Copyright (C) 2016 Ettus Corporation LLC + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module f15_wf_agg #( + parameter integer Y_WIDTH = 12, + parameter integer X_WIDTH = 16, + parameter integer DECIM_WIDTH = 8 +)( + input wire [Y_WIDTH-1:0] yin_0, + input wire [X_WIDTH-1:0] x_0, + input wire valid_0, + input wire last_0, + input wire [15:0] rng_0, + + output wire [Y_WIDTH-1:0] yout_3, + output wire [7:0] zout_3, + output wire zvalid_3, + + input wire [1:0] cfg_div, + input wire cfg_mode, // 0=MaxHold, 1=Average + input wire [DECIM_WIDTH-1:0] cfg_decim, + input wire cfg_decim_changed, + + input wire clk, + input wire rst +); + + localparam integer R_WIDTH = X_WIDTH + 9; + + // Signals + // Data pah + reg [R_WIDTH-1:0] xe_1; + reg [R_WIDTH-1:0] ye_1; + + wire over_2; + reg [R_WIDTH-1:0] r_2; + reg [Y_WIDTH-1:0] x_2; + reg [Y_WIDTH-1:0] y_2; + + reg [Y_WIDTH-1:0] y_3; + + // Control + reg [DECIM_WIDTH:0] decim_cnt; + reg init_0; + wire init_2; + reg init_force_0; + reg flush_0; + reg zvalid_1; + + + // Datapath + // -------- + + // X predivision mux + always @(posedge clk) + begin + case (cfg_div) + 2'b00: + xe_1 <= { 1'd0, x_0, 8'd0 }; // 1:1 + + 2'b01: + xe_1 <= { 4'd0, x_0, 5'd0 }; // 1:8 + + 2'b10: + xe_1 <= { 7'd0, x_0, 2'd0 }; // 1:64 + + 2'b11: + xe_1 <= { 9'd0, x_0 }; // 1:256 + endcase + end + + // Y register + always @(posedge clk) + begin + if (cfg_mode) + // Average + ye_1 <= { 1'b0, yin_0, rng_0[R_WIDTH-Y_WIDTH-2:0] }; + else + // Max Hold + ye_1 <= { 1'b0, yin_0, {(R_WIDTH-Y_WIDTH-1){1'b0}} }; + end + + // Adder / Substractor + always @(posedge clk) + begin + if (cfg_mode) + // Average + r_2 <= ye_1 + xe_1; + else + // Max-Hold + r_2 <= ye_1 - xe_1; + end + + assign over_2 = r_2[R_WIDTH-1]; + + // Registers for the two branches. + always @(posedge clk) + begin + x_2 <= xe_1[R_WIDTH-2:R_WIDTH-Y_WIDTH-1]; + y_2 <= ye_1[R_WIDTH-2:R_WIDTH-Y_WIDTH-1]; + end + + // Output mux + always @(posedge clk) + begin + // If first : take x_2 + // If average : + // - If overflow = 0 -> take r_2 + // - If overflow = 1 -> sature to all 1's + // If max-hold + // - If overflow = 0 -> take y_2 + // - If overflow = 1 -> take x_2 + if (init_2) + y_3 <= x_2; + else if (cfg_mode) + y_3 <= over_2 ? { (Y_WIDTH){1'b1} } : r_2[R_WIDTH-2:R_WIDTH-Y_WIDTH-1]; + else + y_3 <= over_2 ? x_2 : y_2; + end + + assign yout_3 = y_3; + assign zout_3 = y_3[Y_WIDTH-1:Y_WIDTH-8]; + + + // Control + // ------- + + // 1-in-N decimation counter + always @(posedge clk) + begin + if (rst) + decim_cnt <= 0; + else if (cfg_decim_changed) + // Force Reload + decim_cnt <= { 1'b0, cfg_decim }; + else if (valid_0 & last_0) + if (decim_cnt[DECIM_WIDTH]) + // Reload + decim_cnt <= { 1'b0, cfg_decim }; + else + // Just decrement + decim_cnt <= decim_cnt - 1; + end + + // Decimation flush & init states + always @(posedge clk) + begin + if (rst) begin + // Initial state + flush_0 <= 1'b0; + init_0 <= 1'b1; + init_force_0 <= 1'b0; + end else begin + if (valid_0 & last_0) begin + // Flushing + flush_0 <= decim_cnt[DECIM_WIDTH]; + + // Init after flush or if forced + init_0 <= flush_0 | init_force_0; + end + + // Init forcing after a decim change + if (cfg_decim_changed) + init_force_0 <= 1'b1; + else if (valid_0 & last_0) + init_force_0 <= 1'b0; + end + end + + delay_bit #(2) dl_init(init_0, init_2, clk); + + // Z-output valid + always @(posedge clk) + zvalid_1 <= valid_0 & flush_0; + + delay_bit #(2) dl_zvalid(zvalid_1, zvalid_3, clk); + +endmodule // f15_wf_agg diff --git a/fpga/usrp3/lib/rfnoc/fosphor/fifo_srl.v b/fpga/usrp3/lib/rfnoc/fosphor/fifo_srl.v new file mode 100644 index 000000000..700da18d3 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/fifo_srl.v @@ -0,0 +1,169 @@ +/* + * fifo_srl.v + * + * Very small/light-weight FIFO using SRL. + * Only for synchronous design. Has a fixed depth of 15 or 31 entries and + * always work in the so-called first-word-fall-thru mode. + * + * Copyright (C) 2014 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +module fifo_srl #( + parameter integer WIDTH = 4, + parameter integer LOG2_DEPTH = 5, // 4 or 5 + parameter integer AFULL_LEVEL = -1 // -1 -> No AFULL + +)( + input wire [WIDTH-1:0] di, + input wire wren, + output wire full, + output wire afull, + + output reg [WIDTH-1:0] do, + input wire rden, + output reg empty, + + input wire clk, + input wire rst +); + + genvar i; + + // Signals + wire [WIDTH-1:0] srl_q; + reg [LOG2_DEPTH-1:0] srl_addr; + wire srl_addr_ce; + + wire srl_write; + wire srl_read; + + wire srl_full; + wire srl_afull; + reg srl_empty; + wire srl_aempty; + + // Instanciate the SRLs + generate + if (LOG2_DEPTH == 6) begin + wire [WIDTH-1:0] srl0_q31, srl0_q, srl1_q; + + for (i=0; i<WIDTH; i=i+1) + begin : srl_64 + SRLC32E srl_I0 ( + .Q(srl0_q[i]), + .Q31(srl0_q31[i]), + .A(srl_addr[4:0]), + .CE(srl_write), + .CLK(clk), + .D(di[i]) + ); + + SRLC32E srl_I1 ( + .Q(srl1_q[i]), + .A(srl_addr[4:0]), + .CE(srl_write), + .CLK(clk), + .D(srl0_q31[i]) + ); + + MUXF7 mux_I ( + .O(srl_q[i]), + .I0(srl0_q[i]), + .I1(srl1_q[i]), + .S(srl_addr[5]) + ); + end + end else if (LOG2_DEPTH == 5) begin + for (i=0; i<WIDTH; i=i+1) + SRLC32E srl_I ( + .Q(srl_q[i]), + .A(srl_addr), + .CE(srl_write), + .CLK(clk), + .D(di[i]) + ); + end else if (LOG2_DEPTH == 4) begin + for (i=0; i<WIDTH; i=i+1) + SRL16E srl_I ( + .Q(srl_q[i]), + .A0(srl_addr[0]), + .A1(srl_addr[1]), + .A2(srl_addr[2]), + .A3(srl_addr[3]), + .CE(srl_write), + .CLK(clk), + .D(di[i]) + ); + end + endgenerate + + // Address counter + assign srl_addr_ce = srl_write ^ srl_read; + + always @(posedge clk) + begin + if (rst) + srl_addr <= {LOG2_DEPTH{1'b1}}; + else if (srl_addr_ce) begin + if (srl_write) + srl_addr <= srl_addr + 1; + else + srl_addr <= srl_addr - 1; + end + end + + // SRL status + assign srl_full = srl_addr == {{(LOG2_DEPTH-1){1'b1}}, 1'b0}; + + generate + if (AFULL_LEVEL != -1) begin + assign srl_afull = (srl_addr >= AFULL_LEVEL) && ~&(srl_addr); + end else begin + assign srl_afull = 1'b0; + end + endgenerate + + assign srl_aempty = &(~srl_addr); + + always @(posedge clk) + begin + if (rst) + srl_empty <= 1'b1; + else if (srl_addr_ce) + srl_empty <= srl_aempty & srl_read; + end + + // Output register (to capture whatever comes out from SRL) + always @(posedge clk) + begin + if (srl_read) + do <= srl_q; + end + + // Control and flag generation + // Write/Full is easy + assign srl_write = wren; + assign full = srl_full; + assign afull = srl_afull; + + // Read/Empty is tricky + always @(posedge clk) + begin + if (rst) + empty <= 1'b1; + else if (rden | srl_read) + empty <= srl_empty; + end + + assign srl_read = (rden | empty) & ~srl_empty; + +endmodule // fifo_srl diff --git a/fpga/usrp3/lib/rfnoc/fosphor/rng.v b/fpga/usrp3/lib/rfnoc/fosphor/rng.v new file mode 100644 index 000000000..6d6715fd0 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/fosphor/rng.v @@ -0,0 +1,87 @@ +/* + * rng.v + * + * Very simple 32-bits PRNG using a few underlying LFSR. + * + * Copyright (C) 2014 Ettus Corporation LLC + * Copyright 2018 Ettus Research, a National Instruments Company + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * vim: ts=4 sw=4 + */ + +`ifdef SIM +`default_nettype none +`endif + +// --------------------------------------------------------------------------- +// Main RNG +// --------------------------------------------------------------------------- + +module rng( + output reg [31:0] out, + input wire clk, + input wire rst +); + + // Signals + wire [4:0] out5, out5rev; + wire [7:0] out8; + wire [11:0] out12; + wire [15:0] out16; + + // Instanciate 4 LFSRs of different lengths + lfsr #(.WIDTH( 5), .POLY( 5'b01001)) lfsr5 (.out(out5), .clk(clk), .rst(rst)); + lfsr #(.WIDTH( 8), .POLY( 8'h71 )) lfsr8 (.out(out8), .clk(clk), .rst(rst)); + lfsr #(.WIDTH(12), .POLY(12'hc11 )) lfsr12 (.out(out12), .clk(clk), .rst(rst)); + lfsr #(.WIDTH(16), .POLY(16'h6701 )) lfsr16 (.out(out16), .clk(clk), .rst(rst)); + + // Reverse the 5 bit LFSR output + genvar i; + generate + for (i=0; i<5; i=i+1) + assign out5rev[i] = out5[4-i]; + endgenerate + + // Combine the outputs 'somehow' + always @(posedge clk) + out <= { + out16[15:11] ^ out5rev, // 5 bits + out16[10:2], // 9 bits + out16[1:0] ^ out12[11:10], // 2 bits + out12[9:2], // 8 bits + out12[1:0] ^ out8[7:6], // 2 bits + out8[5:0] // 6 bits + }; + +endmodule // rng + + +// --------------------------------------------------------------------------- +// LFSR sub module +// --------------------------------------------------------------------------- + +module lfsr #( + parameter integer WIDTH = 8, + parameter POLY = 8'h71 +)( + output reg [WIDTH-1:0] out, + input wire clk, + input wire rst +); + + // Signals + wire fb; + + // Linear Feedback + assign fb = ^(out & POLY); + + // Register + always @(posedge clk) + if (rst) + out <= { {(WIDTH-1){1'b0}}, 1'b1 }; + else + out <= { fb, out[WIDTH-1:1] }; + +endmodule // lfsr diff --git a/fpga/usrp3/lib/rfnoc/join_complex.v b/fpga/usrp3/lib/rfnoc/join_complex.v new file mode 100644 index 000000000..ac4ff48d1 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/join_complex.v @@ -0,0 +1,27 @@ + +// Copyright 2014, Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +// Module to join a complex stream to I and Q outputs. NOTE -- ONLY works when you can guarantee upstream paths match! + +module join_complex + #(parameter WIDTH=16) + (input [WIDTH-1:0] ii_tdata, input ii_tlast, input ii_tvalid, output ii_tready, + input [WIDTH-1:0] iq_tdata, input iq_tlast, input iq_tvalid, output iq_tready, + output [WIDTH*2-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready, + output error); + + assign o_tdata = {ii_tdata,iq_tdata}; + + assign o_tlast = ii_tlast; + + assign o_tvalid = ii_tvalid; + + assign ii_tready = o_tready; + assign iq_tready = o_tready; + + assign error = (ii_tlast ^ iq_tlast) | (ii_tvalid ^ iq_tvalid); + +endmodule // join_complex diff --git a/fpga/usrp3/lib/rfnoc/keep_one_in_n.v b/fpga/usrp3/lib/rfnoc/keep_one_in_n.v new file mode 100644 index 000000000..d83dcd7cb --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/keep_one_in_n.v @@ -0,0 +1,73 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Note: n == 0 lets everything through. +// Warning: Sample / packet counts reset when n is changed, caution if changing during operation! + +module keep_one_in_n #( + parameter KEEP_FIRST=0, // 0: Drop n-1 words then keep last word, 1: Keep 1st word then drop n-1 + parameter WIDTH=16, + parameter MAX_N=65535 +)( + input clk, input reset, + input vector_mode, + input [$clog2(MAX_N+1)-1:0] n, + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + reg [$clog2(MAX_N+1)-1:0] sample_cnt, pkt_cnt, n_reg; + reg n_changed; + + always @(posedge clk) begin + if (reset) begin + n_reg <= 1; + n_changed <= 1'b0; + end else begin + n_reg <= n; + if (n_reg != n) begin + n_changed <= 1'b1; + end else begin + n_changed <= 1'b0; + end + end + end + + wire on_last_sample = ( (sample_cnt >= n_reg) | (n_reg == 0) ); + wire on_first_sample = ( (sample_cnt == 1) | (n_reg == 0) ); + wire on_last_pkt = ( (pkt_cnt >= n_reg) | (n_reg == 0) ); + wire on_first_pkt = ( (pkt_cnt == 1) | (n_reg == 0) ); + + always @(posedge clk) begin + if (reset | n_changed) begin + sample_cnt <= 1; + pkt_cnt <= 1; + end else begin + if (i_tvalid & i_tready) begin + if (on_last_sample) begin + sample_cnt <= 1; + end else begin + sample_cnt <= sample_cnt + 1'd1; + end + end + if (i_tvalid & i_tready & i_tlast) begin + if (on_last_pkt) begin + pkt_cnt <= 1; + end else begin + pkt_cnt <= pkt_cnt + 1'd1; + end + end + end + end + + assign i_tready = o_tready | (vector_mode ? (KEEP_FIRST ? ~on_first_pkt : ~on_last_pkt) : + (KEEP_FIRST ? ~on_first_sample : ~on_last_sample)); + assign o_tvalid = i_tvalid & (vector_mode ? (KEEP_FIRST ? on_first_pkt : on_last_pkt) : + (KEEP_FIRST ? on_first_sample : on_last_sample)); + assign o_tdata = i_tdata; + assign o_tlast = i_tlast & (vector_mode ? 1'b1 : on_last_pkt); + +endmodule // keep_one_in_n_vec diff --git a/fpga/usrp3/lib/rfnoc/moving_sum.v b/fpga/usrp3/lib/rfnoc/moving_sum.v new file mode 100644 index 000000000..e3d8e2889 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/moving_sum.v @@ -0,0 +1,80 @@ +// +// Copyright 2016 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module moving_sum #( + parameter MAX_LEN = 1023, + parameter WIDTH = 16 +)( + input clk, input reset, input clear, + input [$clog2(MAX_LEN+1)-1:0] len, + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH+$clog2(MAX_LEN+1)-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + wire signed [WIDTH+$clog2(MAX_LEN+1)-1:0] sum; + reg signed [WIDTH+$clog2(MAX_LEN+1)-1:0] sum_reg; + reg [$clog2(MAX_LEN+1)-1:0] full_count, len_reg; + reg len_changed; + + wire full = (full_count == len_reg); + wire do_op = (i_tvalid & i_tready); + + wire i_tready_int, i_tvalid_int; + wire fifo_tvalid, fifo_tready; + wire [WIDTH-1:0] fifo_tdata; + + axi_fifo #(.WIDTH(WIDTH), .SIZE($clog2(MAX_LEN))) axi_fifo ( + .clk(clk), .reset(reset | len_changed), .clear(clear), + .i_tdata(i_tdata), .i_tvalid(do_op), .i_tready(), + .o_tdata(fifo_tdata), .o_tvalid(fifo_tvalid), .o_tready(fifo_tready), + .occupied(), .space()); + + assign fifo_tready = i_tvalid & i_tready_int & full; + + always @(posedge clk) begin + if (reset | clear | len_changed) begin + full_count <= 'd0; + end else begin + if (do_op & ~full) begin + full_count <= full_count + 1; + end + end + end + + assign sum = sum_reg + $signed(i_tdata) - (full ? $signed(fifo_tdata) : 0); + + always @(posedge clk) begin + if (reset | clear) begin + sum_reg <= 'd0; + len_reg <= 1; + len_changed <= 1'b0; + end else begin + len_reg <= (len == 0) ? 1 : len; + if (len_reg != len) begin + len_changed <= 1'b1; + end else begin + len_changed <= 1'b0; + end + if (len_changed) begin + sum_reg <= 'd0; + end else if (do_op) begin + sum_reg <= sum; + end + end + end + + // Output register + axi_fifo_flop #(.WIDTH(WIDTH+$clog2(MAX_LEN+1)+1)) axi_fifo_flop ( + .clk(clk), .reset(reset), .clear(clear), + .i_tdata({i_tlast,sum}), .i_tvalid(i_tvalid_int), .i_tready(i_tready_int), + .o_tdata({o_tlast,o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready), + .occupied(), .space()); + + assign i_tready = (~full | (fifo_tvalid & full)) & i_tready_int & ~len_changed; + assign i_tvalid_int = (~full | (fifo_tvalid & full)) & i_tvalid & ~len_changed; + +endmodule // moving_sum diff --git a/fpga/usrp3/lib/rfnoc/mult.v b/fpga/usrp3/lib/rfnoc/mult.v new file mode 100644 index 000000000..c32025236 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/mult.v @@ -0,0 +1,115 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Write xilinx DSP48E1 primitive for multiplication with AXI interfaces +// Latency must be 2 to 4 + +// FIXME handle tlast +// FIXME handle CASCADE_OUT + +module mult + #(parameter WIDTH_A=25, + parameter WIDTH_B=18, + parameter WIDTH_P=48, // must be 48 if you are cascading + parameter DROP_TOP_P=0, // must be 0 if you are cascading + parameter LATENCY=3, + parameter CASCADE_OUT=0) + (input clk, input reset, + input [WIDTH_A-1:0] a_tdata, input a_tlast, input a_tvalid, output a_tready, + input [WIDTH_B-1:0] b_tdata, input b_tlast, input b_tvalid, output b_tready, + output [WIDTH_P-1:0] p_tdata, output p_tlast, output p_tvalid, input p_tready); + + wire [24:0] A_IN = { a_tdata, {(25-(WIDTH_A)){1'b0}}}; + wire [17:0] B_IN = { b_tdata, {(18-(WIDTH_B)){1'b0}}}; + wire [47:0] P1_OUT, P1_OUT_CASC; + wire [47:0] p_tdata_int = CASCADE_OUT ? P1_OUT_CASC : P1_OUT; + assign p_tdata = p_tdata_int[47-DROP_TOP_P:48-WIDTH_P-DROP_TOP_P]; + + localparam MREG_IN = 1; // Always have this reg + localparam PREG_IN = (LATENCY >= 3) ? 1 : 0; + localparam A2REG_IN = (LATENCY >= 2) ? 1 : 0; + localparam A1REG_IN = (LATENCY == 4) ? 1 : 0; + localparam AREG_IN = A1REG_IN + A2REG_IN; + + wire [A1REG_IN:0] en0, en1; + wire [PREG_IN:0] en_post; + reg CEP, CEM, CEA2, CEA1, CEB2, CEB1; + wire CE = 1'b0; // FIXME + + always @* + case(LATENCY) + 2 : {CEP, CEM, CEA2, CEA1, CEB2, CEB1} <= { 1'b0 , en_post[0], en0[0], 1'b0 , en1[0], 1'b0 }; + 3 : {CEP, CEM, CEA2, CEA1, CEB2, CEB1} <= { en_post[1], en_post[0], en0[0], 1'b0 , en1[0], 1'b0 }; + 4 : {CEP, CEM, CEA2, CEA1, CEB2, CEB1} <= { en_post[1], en_post[0], en0[1], en0[0], en1[1], en1[0] }; + endcase + + axi_pipe_join #(.PRE_JOIN_STAGES0(AREG_IN), .PRE_JOIN_STAGES1(AREG_IN), + .POST_JOIN_STAGES(MREG_IN+PREG_IN)) axi_pipe_join + (.clk(clk), .reset(reset), .clear(1'b0), + .i0_tlast(a_tlast), .i0_tvalid(a_tvalid), .i0_tready(a_tready), + .i1_tlast(b_tlast), .i1_tvalid(b_tvalid), .i1_tready(b_tready), + .o_tlast(p_tlast), .o_tvalid(p_tvalid), .o_tready(p_tready), + .enables0(en0), .enables1(en1), .enables_post(en_post)); + + DSP48E1 #(.ACASCREG(AREG_IN), + .AREG(AREG_IN), + .ADREG(0), + .DREG(0), + .BCASCREG(AREG_IN), + .BREG(AREG_IN), + .MREG(MREG_IN), + .PREG(PREG_IN)) + DSP48_inst (.ACOUT(), + .BCOUT(), + .CARRYCASCOUT(), + .CARRYOUT(), + .MULTSIGNOUT(), + .OVERFLOW(), + .P(P1_OUT), + .PATTERNBDETECT(), + .PATTERNDETECT(), + .PCOUT(P1_OUT_CASC), + .UNDERFLOW(), + .A({5'b0,A_IN}), + .ACIN(30'b0), + .ALUMODE(4'b0000), + .B(B_IN), + .BCIN(18'b0), + .C(48'b0), + .CARRYCASCIN(1'b0), + .CARRYIN(1'b0), + .CARRYINSEL(3'b0), + .CEA1(CEA1), + .CEA2(CEA2), + .CEAD(1'b0), + .CEALUMODE(1'b1), + .CEB1(CEB1), + .CEB2(CEB2), + .CEC(CE), // + .CECARRYIN(CE), + .CECTRL(1'b1), + .CED(CE), + .CEINMODE(CE), + .CEM(CEM), + .CEP(CEP), + .CLK(clk), + .D(25'b0), + .INMODE(5'b0), + .MULTSIGNIN(1'b0), + .OPMODE(7'b0000101), + .PCIN(48'b0), + .RSTA(reset), + .RSTALLCARRYIN(reset), + .RSTALUMODE(reset), + .RSTB(reset), + .RSTC(reset), + .RSTD(reset), + .RSTCTRL(reset), + .RSTINMODE(reset), + .RSTM(reset), + .RSTP(reset)); + +endmodule // mult diff --git a/fpga/usrp3/lib/rfnoc/mult_add.v b/fpga/usrp3/lib/rfnoc/mult_add.v new file mode 100644 index 000000000..e853d7d74 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/mult_add.v @@ -0,0 +1,124 @@ + +// Copyright 2014 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// Write xilinx DSP48E1 primitive for mult-add with AXI interfaces + +module mult_add + #(parameter WIDTH_A=25, + parameter WIDTH_B=18, + parameter WIDTH_P=48, // Must be 48 if you are cascading + parameter DROP_TOP_P=0, // Must be 0 if you are cascading + parameter LATENCY=3, + parameter CASCADE_IN=0, + parameter CASCADE_OUT=0) + (input clk, input reset, + input [WIDTH_A-1:0] a_tdata, input a_tlast, input a_tvalid, output a_tready, + input [WIDTH_B-1:0] b_tdata, input b_tlast, input b_tvalid, output b_tready, + input [WIDTH_P-1:0] c_tdata, input c_tlast, input c_tvalid, output c_tready, + output [WIDTH_P-1:0] p_tdata, output p_tlast, output p_tvalid, input p_tready); + + wire [24:0] A_IN = { a_tdata, {(25-(WIDTH_A)){1'b0}}}; + wire [17:0] B_IN = { b_tdata, {(18-(WIDTH_B)){1'b0}}}; + wire [47:0] P1_OUT, P1_OUT_CASC; + wire [47:0] p_tdata_int = CASCADE_OUT ? P1_OUT_CASC : P1_OUT; + assign p_tdata = p_tdata_int[47-DROP_TOP_P:48-WIDTH_P-DROP_TOP_P]; + + wire [47:0] c_tdata_int = { {DROP_TOP_P{c_tdata[WIDTH_P-1]}}, c_tdata, {(48-WIDTH_P-DROP_TOP_P){1'b0}} }; + + wire [47:0] CIN = CASCADE_IN ? 48'h0000_0000_0000 : c_tdata_int; + wire [47:0] PCIN = CASCADE_IN ? c_tdata_int : 48'h0000_0000_0000; + + localparam MREG_IN = 1; // Always have this reg + localparam PREG_IN = (LATENCY >= 3) ? 1 : 0; + localparam A2REG_IN = (LATENCY >= 2) ? 1 : 0; + localparam A1REG_IN = (LATENCY == 4) ? 1 : 0; + localparam AREG_IN = A1REG_IN + A2REG_IN; + // See OPMODE Control Bits Settings, Table 2-7,2-8,2-9 + localparam ZMUX_PCIN = 3'b001; + localparam ZMUX_C = 3'b011; + localparam XMUX_M = 2'b01; + localparam YMUX_M = 2'b01; + + wire [A1REG_IN:0] enables_a, enables_b; + wire enable_c, enable_m; + wire [PREG_IN:0] en_post; + wire CE = 1'b1; // FIXME + wire LOAD = 1'b1; + wire CEC, CEM, CEP; + reg CEA2, CEA1, CEB2, CEB1; + + always @* + case(LATENCY) + 3 : {CEA2, CEA1, CEB2, CEB1} <= { enables_a[0], 1'b0 , enables_b[0], 1'b0 }; + 4 : {CEA2, CEA1, CEB2, CEB1} <= { enables_a[1], enables_a[0], enables_b[1], enables_b[0] }; + endcase + + axi_pipe_mac #(.LATENCY(LATENCY), .CASCADE_IN(CASCADE_IN)) axi_pipe_mac + (.clk(clk), .reset(reset), .clear(1'b0), + .a_tlast(a_tlast), .a_tvalid(a_tvalid), .a_tready(a_tready), + .b_tlast(b_tlast), .b_tvalid(b_tvalid), .b_tready(b_tready), + .c_tlast(c_tlast), .c_tvalid(c_tvalid), .c_tready(c_tready), + .p_tlast(p_tlast), .p_tvalid(p_tvalid), .p_tready(p_tready), + .enables_a(enables_a), .enables_b(enables_b), .enable_c(CEC), .enable_m(CEM), .enable_p(CEP)); + + DSP48E1 #(.ACASCREG(AREG_IN), + .AREG(AREG_IN), + .ADREG(0), + .DREG(0), + .BCASCREG(AREG_IN), + .BREG(AREG_IN), + .MREG(MREG_IN), + .PREG(PREG_IN)) + DSP48_inst (.ACOUT(), // Outputs start here + .BCOUT(), + .CARRYCASCOUT(), + .CARRYOUT(), + .MULTSIGNOUT(), + .OVERFLOW(), + .P(P1_OUT), + .PATTERNBDETECT(), + .PATTERNDETECT(), + .PCOUT(P1_OUT_CASC), + .UNDERFLOW(), + .A({5'b0,A_IN}), // Inputs start here + .ACIN(30'b0), + .ALUMODE(4'b0000), ////////////////////// + .B(B_IN), + .BCIN(18'b0), + .C(CIN), /////////////////////// + .CARRYCASCIN(1'b0), + .CARRYIN(1'b0), + .CARRYINSEL(3'b0), + .CEA1(CEA1), + .CEA2(CEA2), + .CEAD(1'b0), + .CEALUMODE(1'b1), //////////////////////// + .CEB1(CEB1), + .CEB2(CEB2), + .CEC(CEC), /////////////////////////// + .CECARRYIN(CE), + .CECTRL(CE), + .CED(CE), + .CEINMODE(CE), + .CEM(CEM), + .CEP(CEP), + .CLK(clk), + .D(25'b0), + .INMODE(5'b0), /////////////////////// + .MULTSIGNIN(1'b0), + .OPMODE({(CASCADE_IN ? ZMUX_PCIN : ZMUX_C), YMUX_M, XMUX_M}), // //////////////////// + .PCIN(PCIN), ////////////////////// + .RSTA(reset), + .RSTALLCARRYIN(reset), + .RSTALUMODE(reset), + .RSTB(reset), + .RSTC(reset), + .RSTD(reset), + .RSTCTRL(reset), + .RSTINMODE(reset), + .RSTM(reset), + .RSTP(reset)); + +endmodule // mult diff --git a/fpga/usrp3/lib/rfnoc/mult_add_rc.v b/fpga/usrp3/lib/rfnoc/mult_add_rc.v new file mode 100644 index 000000000..91d548ee0 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/mult_add_rc.v @@ -0,0 +1,64 @@ + +// Copyright 2014 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// Complex times real. Complex number is on port B (18I, 18Q), real is on A (25 bits) + +module mult_add_rc + #(parameter WIDTH_REAL=25, + parameter WIDTH_CPLX=18, + parameter WIDTH_P=48, + parameter DROP_TOP_P=0, + parameter LATENCY=3, + parameter CASCADE_IN=0, + parameter CASCADE_OUT=0) + (input clk, input reset, + input [WIDTH_REAL-1:0] real_tdata, input real_tlast, input real_tvalid, output real_tready, + input [2*WIDTH_CPLX-1:0] cplx_tdata, input cplx_tlast, input cplx_tvalid, output cplx_tready, + input [2*WIDTH_P-1:0] c_tdata, input c_tlast, input c_tvalid, output c_tready, + output [2*WIDTH_P-1:0] p_tdata, output p_tlast, output p_tvalid, input p_tready); + + // NOTE -- we cheat here and share ready/valid. This works because we can guarantee both + // paths will match + + generate + if(WIDTH_REAL > WIDTH_CPLX) + begin + mult_add #(.WIDTH_A(WIDTH_REAL), .WIDTH_B(WIDTH_CPLX), .WIDTH_P(WIDTH_P), .DROP_TOP_P(DROP_TOP_P), + .LATENCY(LATENCY), .CASCADE_IN(CASCADE_IN), .CASCADE_OUT(CASCADE_OUT)) mult_add_i + (.clk(clk), .reset(reset), + .a_tdata(real_tdata), .a_tlast(real_tlast), .a_tvalid(real_tvalid), .a_tready(real_tready), + .b_tdata(cplx_tdata[2*WIDTH_CPLX-1:WIDTH_CPLX]), .b_tlast(cplx_tlast), .b_tvalid(cplx_tvalid), .b_tready(cplx_tready), + .c_tdata(c_tdata[2*WIDTH_P-1:WIDTH_P]), .c_tlast(c_tlast), .c_tvalid(c_tvalid), .c_tready(c_tready), + .p_tdata(p_tdata[2*WIDTH_P-1:WIDTH_P]), .p_tlast(p_tlast), .p_tvalid(p_tvalid), .p_tready(p_tready)); + + mult_add #(.WIDTH_A(WIDTH_REAL), .WIDTH_B(WIDTH_CPLX), .WIDTH_P(WIDTH_P), .DROP_TOP_P(DROP_TOP_P), + .LATENCY(LATENCY), .CASCADE_IN(CASCADE_IN), .CASCADE_OUT(CASCADE_OUT)) mult_add_q + (.clk(clk), .reset(reset), + .a_tdata(real_tdata), .a_tlast(real_tlast), .a_tvalid(real_tvalid), .a_tready(), + .b_tdata(cplx_tdata[WIDTH_CPLX-1:0]), .b_tlast(cplx_tlast), .b_tvalid(cplx_tvalid), .b_tready(), + .c_tdata(c_tdata[WIDTH_P-1:0]), .c_tlast(c_tlast), .c_tvalid(c_tvalid), .c_tready(), + .p_tdata(p_tdata[WIDTH_P-1:0]), .p_tlast(), .p_tvalid(), .p_tready(p_tready)); + end // if (WIDTH_REAL > WIDTH_CPLX) + else + begin + mult_add #(.WIDTH_A(WIDTH_CPLX), .WIDTH_B(WIDTH_REAL), .WIDTH_P(WIDTH_P), .DROP_TOP_P(DROP_TOP_P), + .LATENCY(LATENCY), .CASCADE_IN(CASCADE_IN), .CASCADE_OUT(CASCADE_OUT)) mult_add_i + (.clk(clk), .reset(reset), + .a_tdata(cplx_tdata[2*WIDTH_CPLX-1:WIDTH_CPLX]), .a_tlast(cplx_tlast), .a_tvalid(cplx_tvalid), .a_tready(cplx_tready), + .b_tdata(real_tdata), .b_tlast(real_tlast), .b_tvalid(real_tvalid), .b_tready(real_tready), + .c_tdata(c_tdata[2*WIDTH_P-1:WIDTH_P]), .c_tlast(c_tlast), .c_tvalid(c_tvalid), .c_tready(c_tready), + .p_tdata(p_tdata[2*WIDTH_P-1:WIDTH_P]), .p_tlast(p_tlast), .p_tvalid(p_tvalid), .p_tready(p_tready)); + + mult_add #(.WIDTH_A(WIDTH_CPLX), .WIDTH_B(WIDTH_REAL), .WIDTH_P(WIDTH_P), .DROP_TOP_P(DROP_TOP_P), + .LATENCY(LATENCY), .CASCADE_IN(CASCADE_IN), .CASCADE_OUT(CASCADE_OUT)) mult_add_q + (.clk(clk), .reset(reset), + .a_tdata(cplx_tdata[WIDTH_CPLX-1:0]), .a_tlast(cplx_tlast), .a_tvalid(cplx_tvalid), .a_tready(), + .b_tdata(real_tdata), .b_tlast(real_tlast), .b_tvalid(real_tvalid), .b_tready(), + .c_tdata(c_tdata[WIDTH_P-1:0]), .c_tlast(c_tlast), .c_tvalid(c_tvalid), .c_tready(), + .p_tdata(p_tdata[WIDTH_P-1:0]), .p_tlast(), .p_tvalid(), .p_tready(p_tready)); + end // else: !if(WIDTH_REAL > WIDTH_CPLX) + endgenerate + +endmodule // mult diff --git a/fpga/usrp3/lib/rfnoc/mult_rc.v b/fpga/usrp3/lib/rfnoc/mult_rc.v new file mode 100644 index 000000000..f8643f33e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/mult_rc.v @@ -0,0 +1,57 @@ + +// Copyright 2014 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// Complex times real. One width must be less than 26 and the other less than 19. + +module mult_rc + #(parameter WIDTH_REAL=25, + parameter WIDTH_CPLX=18, + parameter WIDTH_P=48, + parameter DROP_TOP_P=0, + parameter LATENCY=3, + parameter CASCADE_OUT=0) + (input clk, input reset, + input [WIDTH_REAL-1:0] real_tdata, input real_tlast, input real_tvalid, output real_tready, + input [2*WIDTH_CPLX-1:0] cplx_tdata, input cplx_tlast, input cplx_tvalid, output cplx_tready, + output [2*WIDTH_P-1:0] p_tdata, output p_tlast, output p_tvalid, input p_tready); + + // NOTE -- we cheat here and share ready/valid. This works because we can guarantee both + // paths will match + generate + if(WIDTH_REAL > WIDTH_CPLX) + begin + mult #(.WIDTH_A(WIDTH_REAL), .WIDTH_B(WIDTH_CPLX), .WIDTH_P(WIDTH_P), .DROP_TOP_P(DROP_TOP_P), + .LATENCY(LATENCY), .CASCADE_OUT(CASCADE_OUT)) mult_i + (.clk(clk), .reset(reset), + .a_tdata(real_tdata), .a_tlast(real_tlast), .a_tvalid(real_tvalid), .a_tready(real_tready), + .b_tdata(cplx_tdata[2*WIDTH_CPLX-1:WIDTH_CPLX]), .b_tlast(cplx_tlast), .b_tvalid(cplx_tvalid), .b_tready(cplx_tready), + .p_tdata(p_tdata[2*WIDTH_P-1:WIDTH_P]), .p_tlast(p_tlast), .p_tvalid(p_tvalid), .p_tready(p_tready)); + + mult #(.WIDTH_A(WIDTH_REAL), .WIDTH_B(WIDTH_CPLX), .WIDTH_P(WIDTH_P), .DROP_TOP_P(DROP_TOP_P), + .LATENCY(LATENCY), .CASCADE_OUT(CASCADE_OUT)) mult_q + (.clk(clk), .reset(reset), + .a_tdata(real_tdata), .a_tlast(real_tlast), .a_tvalid(real_tvalid), .a_tready(), + .b_tdata(cplx_tdata[WIDTH_CPLX-1:0]), .b_tlast(cplx_tlast), .b_tvalid(cplx_tvalid), .b_tready(), + .p_tdata(p_tdata[WIDTH_P-1:0]), .p_tlast(), .p_tvalid(), .p_tready(p_tready)); + end // if (WIDTH_REAL > WIDTH_CPLX) + else + begin + mult #(.WIDTH_A(WIDTH_CPLX), .WIDTH_B(WIDTH_REAL), .WIDTH_P(WIDTH_P), .DROP_TOP_P(DROP_TOP_P), + .LATENCY(LATENCY), .CASCADE_OUT(CASCADE_OUT)) mult_i + (.clk(clk), .reset(reset), + .a_tdata(cplx_tdata[2*WIDTH_CPLX-1:WIDTH_CPLX]), .a_tlast(cplx_tlast), .a_tvalid(cplx_tvalid), .a_tready(cplx_tready), + .b_tdata(real_tdata), .b_tlast(real_tlast), .b_tvalid(real_tvalid), .b_tready(real_tready), + .p_tdata(p_tdata[2*WIDTH_P-1:WIDTH_P]), .p_tlast(p_tlast), .p_tvalid(p_tvalid), .p_tready(p_tready)); + + mult #(.WIDTH_A(WIDTH_CPLX), .WIDTH_B(WIDTH_REAL), .WIDTH_P(WIDTH_P), .DROP_TOP_P(DROP_TOP_P), + .LATENCY(LATENCY), .CASCADE_OUT(CASCADE_OUT)) mult_q + (.clk(clk), .reset(reset), + .a_tdata(cplx_tdata[WIDTH_CPLX-1:0]), .a_tlast(cplx_tlast), .a_tvalid(cplx_tvalid), .a_tready(), + .b_tdata(real_tdata), .b_tlast(real_tlast), .b_tvalid(real_tvalid), .b_tready(), + .p_tdata(p_tdata[WIDTH_P-1:0]), .p_tlast(), .p_tvalid(), .p_tready(p_tready)); + end // else: !if(WIDTH_REAL > WIDTH_CPLX) + endgenerate + +endmodule // mult_rc diff --git a/fpga/usrp3/lib/rfnoc/multiply.v b/fpga/usrp3/lib/rfnoc/multiply.v new file mode 100644 index 000000000..ad0353c66 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/multiply.v @@ -0,0 +1,138 @@ +// +// Copyright 2015 Ettus Research +// +// AXI Stream multiplier. Relies on synthesis engine for proper DSP inference. + +module multiply #( + parameter WIDTH_A = 16, + parameter WIDTH_B = 16, + parameter WIDTH_P = 32, + parameter DROP_TOP_P = 1, // Default drops extra bit (16-bit signed x 16-bit signed => 31-bits signed) + parameter LATENCY = 3, // multiplier pipeline latency, 0 - 4 + parameter EN_SATURATE = 0, // Enable saturating output to avoid overflow (adds +1 to latency) + parameter EN_ROUND = 0, // Enable rounding dropped LSBs (adds +1 to latency, total of +2 if used with EN_SATURATE) + parameter SIGNED = 1) // Signed multiply +( + input clk, input reset, + input [WIDTH_A-1:0] a_tdata, input a_tlast, input a_tvalid, output a_tready, + input [WIDTH_B-1:0] b_tdata, input b_tlast, input b_tvalid, output b_tready, + output [WIDTH_P-1:0] p_tdata, output p_tlast, output p_tvalid, input p_tready +); + + localparam A_LATENCY = (LATENCY == 1) ? 1 : + (LATENCY == 2) ? 1 : + (LATENCY == 3) ? 2 : + (LATENCY == 4) ? 2 : 2; + localparam B_LATENCY = A_LATENCY; + localparam P_LATENCY = (LATENCY == 2) ? 1 : + (LATENCY == 3) ? 1 : + (LATENCY == 4) ? 2 : 2; + + reg [WIDTH_A-1:0] a_reg[A_LATENCY-1:0]; + reg [WIDTH_B-1:0] b_reg[B_LATENCY-1:0]; + reg [WIDTH_A+WIDTH_B-1:0] p_reg[P_LATENCY-1:0]; + + wire [A_LATENCY-1:0] en_a_reg; + wire [B_LATENCY-1:0] en_b_reg; + wire [P_LATENCY-1:0] en_p_reg; + wire p_int_tlast, p_int_tvalid, p_int_tready; + axi_pipe_join #( + .PRE_JOIN_STAGES0(A_LATENCY), + .PRE_JOIN_STAGES1(B_LATENCY), + .POST_JOIN_STAGES(P_LATENCY)) + axi_pipe_join ( + .clk(clk), .reset(reset), .clear(1'b0), + .i0_tlast(a_tlast), .i0_tvalid(a_tvalid), .i0_tready(a_tready), + .i1_tlast(b_tlast), .i1_tvalid(b_tvalid), .i1_tready(b_tready), + .o_tlast(p_int_tlast), .o_tvalid(p_int_tvalid), .o_tready(p_int_tready), + .enables0(en_a_reg), .enables1(en_b_reg), .enables_post(en_p_reg)); + + // Multiply + wire [WIDTH_A+WIDTH_B-1:0] p_mult_signed = (LATENCY == 0) ? $signed(a_tdata) * $signed(b_tdata) : $signed(a_reg[A_LATENCY-1]) * $signed(b_reg[B_LATENCY-1]); + wire [WIDTH_A+WIDTH_B-1:0] p_mult_unsigned = (LATENCY == 0) ? a_tdata * b_tdata : a_reg[A_LATENCY-1] * b_reg[B_LATENCY-1]; + wire [WIDTH_A+WIDTH_B-1:0] p_int_tdata = (LATENCY == 0) ? (SIGNED ? p_mult_signed : p_mult_unsigned) : p_reg[P_LATENCY-1]; + + // Register pipeline + integer i; + always @(posedge clk) begin + if (reset) begin + for (i = 0; i < A_LATENCY; i = i + 1) begin + a_reg[i] <= 'd0; + end + for (i = 0; i < B_LATENCY; i = i + 1) begin + b_reg[i] <= 'd0; + end + for (i = 0; i < P_LATENCY; i = i + 1) begin + p_reg[i] <= 'd0; + end + end else begin + for (i = 0; i < A_LATENCY; i = i + 1) begin + if (en_a_reg[i]) begin + if (i == 0) begin + a_reg[i] <= $signed(a_tdata); + end else begin + a_reg[i] <= a_reg[i-1]; + end + end + end + for (i = 0; i < B_LATENCY; i = i + 1) begin + if (en_b_reg[i]) begin + if (i == 0) begin + b_reg[i] <= $signed(b_tdata); + end else begin + b_reg[i] <= b_reg[i-1]; + end + end + end + for (i = 0; i < P_LATENCY; i = i + 1) begin + if (en_p_reg[i]) begin + if (i == 0) begin + p_reg[i] <= SIGNED ? p_mult_signed : p_mult_unsigned; + end else begin + p_reg[i] <= p_reg[i-1]; + end + end + end + end + end + + // Saturate & Round + // TODO: Might be able to replace axi_round with DSP's built in rounding + generate + if ((EN_SATURATE == 1) && (EN_ROUND == 1)) begin + axi_round_and_clip #( + .WIDTH_IN(WIDTH_A+WIDTH_B), + .WIDTH_OUT(WIDTH_P), + .CLIP_BITS(DROP_TOP_P)) + axi_round_and_clip ( + .clk(clk), .reset(reset), + .i_tdata(p_int_tdata), .i_tlast(p_int_tlast), .i_tvalid(p_int_tvalid), .i_tready(p_int_tready), + .o_tdata(p_tdata), .o_tlast(p_tlast), .o_tvalid(p_tvalid), .o_tready(p_tready)); + end else if ((EN_SATURATE == 0) && (EN_ROUND == 1)) begin + axi_round #( + .WIDTH_IN(WIDTH_A+WIDTH_B-DROP_TOP_P), + .WIDTH_OUT(WIDTH_P)) + axi_round ( + .clk(clk), .reset(reset), + .i_tdata(p_int_tdata[WIDTH_A+WIDTH_B-DROP_TOP_P-1:0]), .i_tlast(p_int_tlast), .i_tvalid(p_int_tvalid), .i_tready(p_int_tready), + .o_tdata(p_tdata), .o_tlast(p_tlast), .o_tvalid(p_tvalid), .o_tready(p_tready)); + end else if ((EN_SATURATE == 1) && (EN_ROUND == 0)) begin + wire [WIDTH_A+WIDTH_B-DROP_TOP_P-1:0] p_clip_tdata; + axi_clip #( + .WIDTH_IN(WIDTH_A+WIDTH_B), + .WIDTH_OUT(WIDTH_A+WIDTH_B-DROP_TOP_P), + .CLIP_BITS(DROP_TOP_P)) + axi_clip ( + .clk(clk), .reset(reset), + .i_tdata(p_int_tdata), .i_tlast(p_int_tlast), .i_tvalid(p_int_tvalid), .i_tready(p_int_tready), + .o_tdata(p_clip_tdata), .o_tlast(p_tlast), .o_tvalid(p_tvalid), .o_tready(p_tready)); + assign p_tdata = p_clip_tdata[WIDTH_A+WIDTH_B-DROP_TOP_P-1:WIDTH_A+WIDTH_B-DROP_TOP_P-WIDTH_P]; + end else begin + assign p_tdata = p_int_tdata[WIDTH_A+WIDTH_B-DROP_TOP_P-1:WIDTH_A+WIDTH_B-DROP_TOP_P-WIDTH_P]; + assign p_tlast = p_int_tlast; + assign p_tvalid = p_int_tvalid; + assign p_int_tready = p_tready; + end + endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/noc_shell_regs.vh b/fpga/usrp3/lib/rfnoc/noc_shell_regs.vh new file mode 100644 index 000000000..a81bd4119 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/noc_shell_regs.vh @@ -0,0 +1,25 @@ + // Registers 0 - 127 for NoC Shell + localparam [7:0] SR_FLOW_CTRL_BYTES_PER_ACK = 1; + localparam [7:0] SR_FLOW_CTRL_WINDOW_SIZE = 2; + localparam [7:0] SR_FLOW_CTRL_EN = 3; + localparam [7:0] SR_ERROR_POLICY = 4; + localparam [7:0] SR_SRC_SID = 5; + localparam [7:0] SR_NEXT_DST_SID = 6; + localparam [7:0] SR_RESP_IN_DST_SID = 7; + localparam [7:0] SR_RESP_OUT_DST_SID = 8; + localparam [7:0] SR_FLOW_CTRL_PKT_LIMIT = 9; + localparam [7:0] SR_RB_ADDR_USER = 124; + localparam [7:0] SR_CLEAR_RX_FC = 125; + localparam [7:0] SR_CLEAR_TX_FC = 126; + localparam [7:0] SR_RB_ADDR = 127; + // Registers 128-255 for users + localparam [7:0] SR_USER_REG_BASE = 128; + + // NoC Shell readback registers + localparam [7:0] RB_NOC_ID = 0; + localparam [7:0] RB_GLOBAL_PARAMS = 1; + localparam [7:0] RB_FIFOSIZE = 2; + localparam [7:0] RB_MTU = 3; + localparam [7:0] RB_BLOCK_PORT_SIDS = 4; + localparam [7:0] RB_USER_RB_DATA = 5; + localparam [7:0] RB_NOC_SHELL_COMPAT_NUM = 6; diff --git a/fpga/usrp3/lib/rfnoc/noc_traffic_counter.v b/fpga/usrp3/lib/rfnoc/noc_traffic_counter.v new file mode 100644 index 000000000..5ac2ff44e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/noc_traffic_counter.v @@ -0,0 +1,128 @@ +// +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module noc_traffic_counter #( + parameter SR_REG_BASE = 128, + parameter RB_REG_BASE = 0) +( + input bus_clk, input bus_rst, + input ce_clk, input ce_rst, + + // Control sink + input [31:0] set_data, input [7:0] set_addr, input set_stb, + output rb_stb, input [7:0] rb_addr, output [63:0] rb_data, + + // Traffic signals to count + input i_tlast, input i_tvalid, input i_tready, + input o_tlast, input o_tvalid, input o_tready, + input str_sink_tlast, input str_sink_tvalid, input str_sink_tready, + input str_src_tlast, input str_src_tvalid, input str_src_tready +); + wire en, counter_enable_changed; + + wire [31:0] set_data_bclk; + wire [7:0] set_addr_bclk; + wire set_stb_bclk; + + reg [63:0] rb_data_bclk; + wire [7:0] rb_addr_bclk; + reg rb_stb_bclk; + + reg [63:0] tick_cnt_noc_shell; + + wire [63:0] xbar_to_shell_xfer_cnt; + wire [63:0] xbar_to_shell_pkt_cnt; + + wire [63:0] shell_to_xbar_xfer_cnt; + wire [63:0] shell_to_xbar_pkt_cnt; + + wire [63:0] shell_to_ce_xfer_cnt; + wire [63:0] shell_to_ce_pkt_cnt; + + wire [63:0] ce_to_shell_xfer_cnt; + wire [63:0] ce_to_shell_pkt_cnt; + + localparam SR_COUNTER_ENABLE = SR_REG_BASE + 0; + + localparam RB_SIGNATURE = RB_REG_BASE + 0; + localparam RB_BUS_CLK_TICKS = RB_REG_BASE + 1; + localparam RB_XBAR_TO_SHELL_XFER_CNT = RB_REG_BASE + 2; + localparam RB_XBAR_TO_SHELL_PKT_CNT = RB_REG_BASE + 3; + localparam RB_SHELL_TO_XBAR_XFER_CNT = RB_REG_BASE + 4; + localparam RB_SHELL_TO_XBAR_PKT_CNT = RB_REG_BASE + 5; + localparam RB_SHELL_TO_CE_XFER_CNT = RB_REG_BASE + 6; + localparam RB_SHELL_TO_CE_PKT_CNT = RB_REG_BASE + 7; + localparam RB_CE_TO_SHELL_XFER_CNT = RB_REG_BASE + 8; + localparam RB_CE_TO_SHELL_PKT_CNT = RB_REG_BASE + 9; + + // Registers are implemented on bus clock + axi_fifo_2clk #(.WIDTH(8+8+32), .SIZE(2)) reg_write_to_bclk ( + .reset(ce_rst), .i_aclk(ce_clk), + .i_tdata({set_addr, rb_addr, set_data}), .i_tvalid(set_stb), .i_tready(), + .o_aclk(bus_clk), + .o_tdata({set_addr_bclk, rb_addr_bclk, set_data_bclk}), .o_tvalid(set_stb_bclk), .o_tready(1'b1)); + + axi_fifo_2clk #(.WIDTH(64), .SIZE(2)) reg_rb_from_bclk ( + .reset(bus_rst), .i_aclk(bus_clk), + .i_tdata(rb_data_bclk), .i_tvalid(rb_stb_bclk), .i_tready(), + .o_aclk(ce_clk), + .o_tdata(rb_data), .o_tvalid(rb_stb), .o_tready(1'b1)); + + setting_reg #(.my_addr(SR_COUNTER_ENABLE), .width(1)) enable_measurement_reg ( + .clk(bus_clk), .rst(bus_rst), .strobe(set_stb_bclk), .addr(set_addr_bclk), + .in(set_data_bclk), .out(en), .changed(counter_enable_changed)); + + always @(posedge bus_clk) + if (set_stb_bclk) begin + case(rb_addr_bclk) + RB_SIGNATURE : rb_data_bclk <= 64'h712AFF1C00000000; + RB_BUS_CLK_TICKS : rb_data_bclk <= tick_cnt_noc_shell; + RB_XBAR_TO_SHELL_XFER_CNT : rb_data_bclk <= xbar_to_shell_xfer_cnt; + RB_XBAR_TO_SHELL_PKT_CNT : rb_data_bclk <= xbar_to_shell_pkt_cnt; + RB_SHELL_TO_XBAR_XFER_CNT : rb_data_bclk <= shell_to_xbar_xfer_cnt; + RB_SHELL_TO_XBAR_PKT_CNT : rb_data_bclk <= shell_to_xbar_pkt_cnt; + RB_SHELL_TO_CE_XFER_CNT : rb_data_bclk <= shell_to_ce_xfer_cnt; + RB_SHELL_TO_CE_PKT_CNT : rb_data_bclk <= shell_to_ce_pkt_cnt; + RB_CE_TO_SHELL_XFER_CNT : rb_data_bclk <= ce_to_shell_xfer_cnt; + RB_CE_TO_SHELL_PKT_CNT : rb_data_bclk <= ce_to_shell_pkt_cnt; + default : rb_data_bclk <= 64'h0BADC0DE0BADC0DE; + endcase + end + + always @(posedge bus_clk) + rb_stb_bclk <= set_stb_bclk; + + assign counter_rst = en & counter_enable_changed; + + axis_strm_monitor #(.COUNT_W(64), .PKT_COUNT_EN(1), .XFER_COUNT_EN(1)) xbar_to_shell ( + .clk(bus_clk), .reset(counter_rst), + .axis_tdata(), .axis_tlast(i_tlast & en), .axis_tvalid(i_tvalid & en), .axis_tready(i_tready & en), + .xfer_count(xbar_to_shell_xfer_cnt), .pkt_count(xbar_to_shell_pkt_cnt)); + + axis_strm_monitor #(.COUNT_W(64), .PKT_COUNT_EN(1), .XFER_COUNT_EN(1)) shell_to_xbar ( + .clk(bus_clk), .reset(counter_rst), + .axis_tdata(), .axis_tlast(o_tlast & en), .axis_tvalid(o_tvalid & en), .axis_tready(o_tready & en), + .xfer_count(shell_to_xbar_xfer_cnt), .pkt_count(shell_to_xbar_pkt_cnt)); + + axis_strm_monitor #(.COUNT_W(64), .PKT_COUNT_EN(1), .XFER_COUNT_EN(1)) shell_to_ce ( + .clk(bus_clk), .reset(counter_rst), + .axis_tdata(), .axis_tlast(str_sink_tlast & en), .axis_tvalid(str_sink_tvalid & en), .axis_tready(str_sink_tready & en), + .xfer_count(shell_to_ce_xfer_cnt), .pkt_count(shell_to_ce_pkt_cnt)); + + axis_strm_monitor #(.COUNT_W(64), .PKT_COUNT_EN(1), .XFER_COUNT_EN(1)) ce_to_shell ( + .clk(bus_clk), .reset(counter_rst), + .axis_tdata(), .axis_tlast(str_src_tlast & en), .axis_tvalid(str_src_tvalid & en), .axis_tready(str_src_tready & en), + .xfer_count(ce_to_shell_xfer_cnt), .pkt_count(ce_to_shell_pkt_cnt)); + + // Count clock ticks + always @(posedge bus_clk) + if (counter_rst) + tick_cnt_noc_shell <= 0; + else + if (en) + tick_cnt_noc_shell <= tick_cnt_noc_shell + 1; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/null_source.v b/fpga/usrp3/lib/rfnoc/null_source.v new file mode 100644 index 000000000..f31188dee --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/null_source.v @@ -0,0 +1,99 @@ + +// Copyright 2014, Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +// Dummy data source. Turn it on by setting a packet length in its setting reg, turn it off by setting 0. +// Will generate as fast as it can. + +module null_source + #(parameter SR_LINES_PER_PACKET = 129, + parameter SR_LINE_RATE = 130, + parameter SR_ENABLE_STREAM = 131) + (input clk, input reset, input clear, + input [31:0] sid, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + output [63:0] o_tdata, output o_tlast, output o_tvalid, input o_tready); + + reg [11:0] seqnum; + wire [15:0] rate; + reg [1:0] state; + reg [15:0] line_number; + + wire [63:0] int_tdata; + wire int_tlast, int_tvalid, int_tready; + + wire [15:0] len; + reg [15:0] count; + reg [15:0] packet_count; + wire enable; + + setting_reg #(.my_addr(SR_LINES_PER_PACKET), .width(16)) len_reg + (.clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(len), .changed()); + + setting_reg #(.my_addr(SR_LINE_RATE), .width(16)) rate_reg + (.clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(rate), .changed()); + + setting_reg #(.my_addr(SR_ENABLE_STREAM), .width(1)) enable_reg + (.clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(enable), .changed()); + + localparam IDLE = 2'd0; + localparam HEAD = 2'd1; + localparam DATA = 2'd2; + + always @(posedge clk) + if(reset | clear) begin + state <= IDLE; + count <= 0; + seqnum <= 0; + end else begin + case(state) + IDLE : + if(enable) + state <= HEAD; + HEAD : + if(int_tvalid & int_tready) begin + count <= 1; + state <= DATA; + seqnum <= seqnum + 1; + end + DATA : + if(int_tvalid & int_tready) + if(count >= len) begin + state <= IDLE; + count <= 0; + end + else + count <= count + 1; + default : + state <= IDLE; + endcase // case (state) + end // else: !if(reset) + + wire [15:0] pkt_len = { len[12:0], 3'b000 } + 16'd8; + + assign int_tdata = (state == HEAD) ? { 4'b0000, seqnum, pkt_len, sid } : {~count,count,count,count} ; + assign int_tlast = (count >= len); + + reg [15:0] line_timer; + always @(posedge clk) + if(reset | clear) + line_timer <= 0; + else + if(line_timer == 0) + line_timer <= rate; + else + line_timer <= line_timer - 1; + + assign int_tvalid = ((state==HEAD)|(state==DATA)) & (line_timer==0); + + axi_packet_gate #(.WIDTH(64), .SIZE(10)) gate + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata(int_tdata), .i_tlast(int_tlast), .i_terror(1'b0), .i_tvalid(int_tvalid), .i_tready(int_tready), + .o_tdata(o_tdata), .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready)); + +endmodule // null_source diff --git a/fpga/usrp3/lib/rfnoc/packet_resizer.v b/fpga/usrp3/lib/rfnoc/packet_resizer.v new file mode 100644 index 000000000..5d2675df5 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/packet_resizer.v @@ -0,0 +1,70 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Not necessarily that useful in general, but a good test block + +module packet_resizer + #(parameter SR_PKT_SIZE=1) + (input clk, input reset, + input [15:0] next_dst_sid, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + input [31:0] i_tdata, input [127:0] i_tuser, input i_tlast, input i_tvalid, output i_tready, + output [31:0] o_tdata, output [127:0] o_tuser, output o_tlast, output o_tvalid, input o_tready); + + wire [15:0] pkt_size; + reg [15:0] count; + reg first_packet_in_burst = 1'b1; + + setting_reg #(.my_addr(SR_PKT_SIZE), .width(16)) reg_pkt_size + (.clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(pkt_size)); + + // Parse i_tuser + wire [1:0] TYPE_in = i_tuser[127:126]; + wire TSI_in = i_tuser[125]; + wire EOB_in = i_tuser[124]; + wire [11:0] SEQ_in = i_tuser[123:112]; + wire [15:0] LEN_in = i_tuser[111:96]; + wire [15:0] SRC_in = i_tuser[95:80]; + wire [15:0] DST_in = i_tuser[79:64]; + wire [63:0] TIME_in = i_tuser[63:0]; + + // Generate o_tuser + wire [1:0] TYPE_out = TYPE_in; + wire TSI_out = TSI_in & first_packet_in_burst; + wire EOB_out = EOB_in & i_tlast; + wire [11:0] SEQ_out = SEQ_in; // Doesn't actually matter, it gets overwritten by chdr_framer + wire [15:0] LEN_out = LEN_in; // Only the bottom 2 bits actually matter, rest gets overwritten + wire [15:0] SRC_out = DST_in; + wire [15:0] DST_out = next_dst_sid; + wire [63:0] TIME_out = TIME_in; + + // Pass nearly everything through unchanged + assign o_tdata = i_tdata; + assign o_tlast = (count == pkt_size) | EOB_out; + assign o_tuser = { TYPE_out, TSI_out, EOB_out, SEQ_out, LEN_out, SRC_out, DST_out, TIME_out }; + + assign o_tvalid = i_tvalid; + assign i_tready = o_tready; + + always @(posedge clk) + if(reset) + count <= 16'd4; + else + if(o_tvalid & o_tready) + if(o_tlast) + count <= 16'd4; + else + count <= count + 16'd4; + + always @(posedge clk) + if(reset) + first_packet_in_burst <= 1'b1; + else + if(o_tvalid & o_tready & o_tlast) + first_packet_in_burst <= EOB_out; + +endmodule // packet_resizer diff --git a/fpga/usrp3/lib/rfnoc/periodic_framer.v b/fpga/usrp3/lib/rfnoc/periodic_framer.v new file mode 100644 index 000000000..3e83bcf8d --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/periodic_framer.v @@ -0,0 +1,151 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module periodic_framer #( + parameter SR_FRAME_LEN = 0, + parameter SR_GAP_LEN = 1, + parameter SR_OFFSET = 2, + parameter SR_NUMBER_SYMBOLS_MAX = 3, + parameter SR_NUMBER_SYMBOLS_SHORT = 4, + // Skip a set number of gaps at the beginning. Use 1 to properly frame 802.11 long preamble. + parameter SKIP_GAPS = 1, + parameter WIDTH = 32) +( + input clk, input reset, input clear, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + input [WIDTH-1:0] stream_i_tdata, input stream_i_tlast, input stream_i_tvalid, output stream_i_tready, + output [WIDTH-1:0] stream_o_tdata, output stream_o_tlast, output stream_o_tvalid, input stream_o_tready, + output reg sof, output reg eof); + + wire [15:0] frame_len; + wire [15:0] gap_len; + wire [15:0] offset; + + wire [15:0] numsymbols_max, numsymbols_thisburst, numsymbols_short; + wire [15:0] burst_len; + wire set_numsymbols; + wire consume; + reg [15:0] counter; + reg [$clog2(SKIP_GAPS):0] skip_cnt; + reg [15:0] numsymbols; + + setting_reg #(.my_addr(SR_FRAME_LEN), .width(16)) reg_frame_len ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(frame_len), .changed()); + + setting_reg #(.my_addr(SR_GAP_LEN), .width(16)) reg_gap_len ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(gap_len), .changed()); + + setting_reg #(.my_addr(SR_OFFSET), .width(16)) reg_offset ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(offset), .changed()); + + setting_reg #(.my_addr(SR_NUMBER_SYMBOLS_MAX), .width(16)) reg_max_symbols ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(numsymbols_max), .changed()); + + setting_reg #(.my_addr(SR_NUMBER_SYMBOLS_SHORT), .width(16)) reg_symbols_short ( + .clk(clk), .rst(reset), .strobe(set_stb), .addr(set_addr), .in(set_data), + .out(numsymbols_short), .changed(set_numsymbols)); + + reg [1:0] state; + localparam ST_WAIT_FOR_TRIG = 2'd0; + localparam ST_DO_OFFSET = 2'd1; + localparam ST_FRAME = 2'd2; + localparam ST_GAP = 2'd3; + + reg shorten_burst; + always @(posedge clk) begin + if (reset | clear) begin + shorten_burst <= 1'b0; + end else if (set_numsymbols) begin + shorten_burst <= 1'b1; + end else if(state == ST_WAIT_FOR_TRIG) begin + shorten_burst <= 1'b0; + end + end + + assign numsymbols_thisburst = shorten_burst ? numsymbols_short : numsymbols_max; + + always @(posedge clk) begin + if (reset | clear) begin + eof <= 1'b0; + sof <= 1'b0; + counter <= 1; + skip_cnt <= 0; + numsymbols <= 16'd1; + state <= ST_WAIT_FOR_TRIG; + end else begin + if (consume) begin + case(state) + ST_WAIT_FOR_TRIG : begin + eof <= 1'b0; + skip_cnt <= 0; + if (stream_i_tlast) begin + counter <= 16'b1; + if (offset == 0) begin + state <= ST_FRAME; + end else begin + state <= ST_DO_OFFSET; + end + end + end + + ST_DO_OFFSET : begin + if (counter >= offset) begin + sof <= 1'b1; + counter <= 16'b1; + numsymbols <= 16'd1; + state <= ST_FRAME; + end else begin + counter <= counter + 16'd1; + end + end + + ST_FRAME : begin + if (counter >= frame_len) begin + sof <= 1'b0; + counter <= 1; + numsymbols <= numsymbols + 1; + if (numsymbols >= numsymbols_thisburst) begin + eof <= 1'b1; + state <= ST_WAIT_FOR_TRIG; + end else begin + if (skip_cnt < SKIP_GAPS) begin + skip_cnt <= skip_cnt + 1; + state <= ST_FRAME; + end else begin + state <= ST_GAP; + end + end + end else begin + counter <= counter + 16'd1; + end + end + + ST_GAP : begin + if (counter >= gap_len) begin + state <= ST_FRAME; + counter <= 1; + end else begin + counter <= counter + 16'd1; + end + end + endcase + end + end + end + + assign stream_o_tdata = stream_i_tdata; + assign stream_o_tlast = (state == ST_FRAME) & (counter >= frame_len); + assign stream_o_tvalid = stream_i_tvalid & (state == ST_FRAME); + + assign stream_i_tready = consume; + assign consume = stream_i_tvalid & ((state != ST_FRAME) | stream_o_tready); + +endmodule // periodic_framer diff --git a/fpga/usrp3/lib/rfnoc/phase_accum.v b/fpga/usrp3/lib/rfnoc/phase_accum.v new file mode 100644 index 000000000..a1c50b28a --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/phase_accum.v @@ -0,0 +1,72 @@ +// +// Copyright 2015 Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Expects scaled radians fixed point input format of the form Q2.#, +// Example: WIDTH_IN=8 then input format: Q2.5 (sign bit, 2 integer bits, 5 fraction bits) +module phase_accum #( + parameter REVERSE_ROTATION = 0, // Negate phase increment value + parameter WIDTH_ACCUM = 16, + parameter WIDTH_IN = 16, + parameter WIDTH_OUT = 16) +( + input clk, input reset, input clear, + input [WIDTH_IN-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH_OUT-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready +); + + reg signed [WIDTH_ACCUM-1:0] accum, accum_next, phase_inc; + // Scaled radians. Restrict range from +1 to -1. + wire signed [WIDTH_ACCUM-1:0] POS_ROLLOVER = 2**(WIDTH_ACCUM-3); + wire signed [WIDTH_ACCUM-1:0] NEG_ROLLOVER = -(2**(WIDTH_ACCUM-3)); + + wire [WIDTH_OUT-1:0] output_round_tdata; + wire output_round_tvalid, output_round_tready, output_round_tlast; + + // Phase accumulator, can rotate in either direction + always @(posedge clk) begin + if (reset | clear) begin + accum <= 'd0; + accum_next <= 'd0; + phase_inc <= 'd0; + end else if (i_tready & i_tvalid) begin + if (i_tlast) begin + accum <= {WIDTH_ACCUM{1'b0}}; + accum_next <= REVERSE_ROTATION ? -$signed(i_tdata) : $signed(i_tdata); + phase_inc <= REVERSE_ROTATION ? -$signed(i_tdata) : $signed(i_tdata); + end else begin + if (accum_next >= POS_ROLLOVER) begin + accum_next <= accum_next + phase_inc - 2*POS_ROLLOVER; + accum <= accum + phase_inc - 2*POS_ROLLOVER; + end else if (accum_next <= NEG_ROLLOVER) begin + accum_next <= accum_next + phase_inc - 2*NEG_ROLLOVER; + accum <= accum + phase_inc - 2*NEG_ROLLOVER; + end else begin + accum_next <= accum_next + phase_inc; + accum <= accum + phase_inc; + end + end + end + end + + generate + // Bypass rounding if accumulator width is same as output width + if (WIDTH_ACCUM == WIDTH_OUT) begin + assign o_tdata = accum; + assign o_tvalid = i_tvalid; + assign o_tlast = i_tlast; + assign i_tready = o_tready; + end else begin + axi_round #( + .WIDTH_IN(WIDTH_ACCUM), + .WIDTH_OUT(WIDTH_OUT)) + axi_round ( + .clk(clk), .reset(reset), + .i_tdata(accum), .i_tlast(i_tlast), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o_tdata(o_tdata), .o_tlast(o_tlast), .o_tvalid(o_tvalid), .o_tready(o_tready)); + end + endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/ram_to_fifo.v b/fpga/usrp3/lib/rfnoc/ram_to_fifo.v new file mode 100644 index 000000000..2f4a4c169 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/ram_to_fifo.v @@ -0,0 +1,57 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +// Dual ported ram attached to a FIFO for readout +// Most useful for storing coefficients for windows, filters, etc. +// Config port is used for writing in order +// i_* (address in) and o_* (data out) ports are for streams, and can read out in arbitrary order + +module ram_to_fifo + #(parameter DWIDTH=32, + parameter AWIDTH=10) + (input clk, input reset, input clear, + // FIXME add writing port + input [DWIDTH-1:0] config_tdata, input config_tlast, input config_tvalid, output config_tready, + input [AWIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [DWIDTH-1:0] o_tdata, output reg o_tlast, output reg o_tvalid, input o_tready); + + // Write side + reg [AWIDTH-1:0] write_addr; + + assign config_tready = 1'b1; + + always @(posedge clk) + if(reset | clear) + write_addr <= 0; + else + if(config_tvalid & config_tready) + if(config_tlast) + write_addr <= 0; + else + write_addr <= write_addr + 1; + + ram_2port #(.DWIDTH(DWIDTH), .AWIDTH(AWIDTH)) ram_2port + (.clka(clk), .ena(1'b1), .wea(config_tvalid), .addra(write_addr), .dia(config_tdata), .doa(), // Write port + .clkb(clk), .enb(i_tready & i_tvalid), .web(1'b0), .addrb(i_tdata), .dib({DWIDTH{1'b1}}), .dob(o_tdata)); // Read port + + // Read side + assign i_tready = ~o_tvalid | o_tready; + + always @(posedge clk) + if(reset | clear) + begin + o_tvalid <= 1'b0; + o_tlast <= 1'b0; + end + else + begin + o_tvalid <= (i_tready & i_tvalid) | (o_tvalid & ~o_tready); + if(i_tready & i_tvalid) + o_tlast <= i_tlast; + end + +endmodule // ram_to_fifo diff --git a/fpga/usrp3/lib/rfnoc/sim/axis_pyld_ctxt_converter_tb/Makefile b/fpga/usrp3/lib/rfnoc/sim/axis_pyld_ctxt_converter_tb/Makefile new file mode 100644 index 000000000..71d2841f9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/sim/axis_pyld_ctxt_converter_tb/Makefile @@ -0,0 +1,45 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs + +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(RFNOC_CORE_SRCS) \ +$(RFNOC_XBAR_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +SIM_TOP = axis_pyld_ctxt_converter_tb + +SIM_SRCS = \ +$(abspath axis_pyld_ctxt_converter_tb.sv) \ + +# MODELSIM_USER_DO = $(abspath wave.do) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/sim/axis_pyld_ctxt_converter_tb/axis_pyld_ctxt_converter_tb.sv b/fpga/usrp3/lib/rfnoc/sim/axis_pyld_ctxt_converter_tb/axis_pyld_ctxt_converter_tb.sv new file mode 100644 index 000000000..c8b50c15a --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/sim/axis_pyld_ctxt_converter_tb/axis_pyld_ctxt_converter_tb.sv @@ -0,0 +1,465 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_pyld_ctxt_converter_tb +// + +`default_nettype none + + +module axis_pyld_ctxt_converter_tb; + + // ---------------------------------------- + // Global settings + // ---------------------------------------- + + // Include macros and time declarations for use with PkgTestExec + `include "test_exec.svh" + + import PkgTestExec::*; + import PkgAxiStreamBfm::*; + import PkgChdrUtils::*; + import PkgChdrBfm::*; + + // Parameters + localparam bit VERBOSE = 0; + localparam int CHDR_W = 64; + localparam int MTU = 7; + localparam int MTU_BITS = (1 << MTU) * CHDR_W; + localparam int NINST = 6; + localparam int START_INST = 0; + localparam int STOP_INST = NINST-1; + localparam int NUM_PKTS_PER_TEST = 100; + localparam int FAST_STALL_PROB = 0; + localparam int SLOW_STALL_PROB = 35; + localparam realtime CHDR_CLK_PERIOD = 3.0; + localparam int MAX_PYLD_W = 256; + + typedef struct { + realtime clk_period; + int item_w; + int nipc; + int ctxt_fifo; + int pyld_fifo; + bit prefetch; + } inst_params_t; + + // Module instances to test + localparam inst_params_t INST_PARAMS[0:NINST-1] = { + '{clk_period: 6.0, item_w:64, nipc: 1, ctxt_fifo:5, pyld_fifo:7, prefetch:1}, + '{clk_period:20.0, item_w:32, nipc: 6, ctxt_fifo:5, pyld_fifo:1, prefetch:1}, + '{clk_period: 3.0, item_w:32, nipc: 4, ctxt_fifo:1, pyld_fifo:2, prefetch:0}, + '{clk_period:10.0, item_w:16, nipc: 4, ctxt_fifo:8, pyld_fifo:5, prefetch:1}, + '{clk_period: 3.0, item_w:32, nipc: 2, ctxt_fifo:1, pyld_fifo:7, prefetch:0}, + '{clk_period: 3.0, item_w:8, nipc:13, ctxt_fifo:1, pyld_fifo:7, prefetch:0} + }; + + // ---------------------------------------- + // Interfaces and clocks + // ---------------------------------------- + + // Clocks and resets + bit rfnoc_chdr_clk, rfnoc_chdr_rst; + bit [NINST-1:0] rfnoc_data_clk, rfnoc_data_rst; + // Common CHDR Clock + sim_clock_gen #(CHDR_CLK_PERIOD) chdr_clk_gen_i (rfnoc_chdr_clk, rfnoc_chdr_rst); + + // Flush interface + logic [31:0] r2c_framer_errors[0:NINST-1]; + logic [31:0] r2c_flush_timeout[0:NINST-1], c2r_flush_timeout[0:NINST-1]; + logic [0:0] r2c_flush_en [0:NINST-1], c2r_flush_en [0:NINST-1]; + wire [0:0] r2c_flush_active [0:NINST-1], c2r_flush_active [0:NINST-1]; + wire [0:0] r2c_flush_done [0:NINST-1], c2r_flush_done [0:NINST-1]; + + // CHDR interface + wire [CHDR_W-1:0] chdr_tdata [0:NINST-1]; + wire chdr_tlast [0:NINST-1]; + wire chdr_tvalid[0:NINST-1]; + wire chdr_tready[0:NINST-1]; + + // AXIS interfaces and BFMs + AxiStreamIf #(CHDR_W, 4) r2c_ctxt [0:NINST-1] (); + AxiStreamIf #(CHDR_W, 4) c2r_ctxt [0:NINST-1] (); + AxiStreamIf #(MAX_PYLD_W) r2c_pyld [0:NINST-1] (); + AxiStreamIf #(MAX_PYLD_W) c2r_pyld [0:NINST-1] (); + AxiStreamBfm #(CHDR_W, 4) ctxt_bfm [0:NINST-1] ; + AxiStreamBfm #(MAX_PYLD_W) pyld_bfm [0:NINST-1] ; + + // Instantiate DUTs + genvar inst_i; + generate for (inst_i = 0; inst_i < NINST; inst_i++) begin: inst + + // Assign clocks and resets to ctxt and pyld streams + assign r2c_ctxt[inst_i].clk = rfnoc_data_clk[inst_i]; + assign r2c_ctxt[inst_i].rst = rfnoc_data_rst[inst_i]; + assign c2r_ctxt[inst_i].clk = rfnoc_data_clk[inst_i]; + assign c2r_ctxt[inst_i].rst = rfnoc_data_rst[inst_i]; + + assign r2c_pyld[inst_i].clk = rfnoc_data_clk[inst_i]; + assign r2c_pyld[inst_i].rst = rfnoc_data_rst[inst_i]; + assign c2r_pyld[inst_i].clk = rfnoc_data_clk[inst_i]; + assign c2r_pyld[inst_i].rst = rfnoc_data_rst[inst_i]; + + // Instantiate clock generator + sim_clock_gen #(INST_PARAMS[inst_i].clk_period) dclk_gen ( + rfnoc_data_clk[inst_i], rfnoc_data_rst[inst_i] + ); + + // Instantiate PyldCtxt to Chdr DUT + axis_pyld_ctxt_to_chdr #( + .CHDR_W (CHDR_W), + .ITEM_W (INST_PARAMS[inst_i].item_w), + .NIPC (INST_PARAMS[inst_i].nipc), + .SYNC_CLKS (INST_PARAMS[inst_i].clk_period == CHDR_CLK_PERIOD), + .CONTEXT_FIFO_SIZE (INST_PARAMS[inst_i].ctxt_fifo), + .PAYLOAD_FIFO_SIZE (INST_PARAMS[inst_i].pyld_fifo), + .MTU (MTU), + .CONTEXT_PREFETCH_EN (INST_PARAMS[inst_i].prefetch) + ) r2c_dut ( + .axis_chdr_clk (rfnoc_chdr_clk), + .axis_chdr_rst (rfnoc_chdr_rst), + .axis_data_clk (rfnoc_data_clk[inst_i]), + .axis_data_rst (rfnoc_data_rst[inst_i]), + .m_axis_chdr_tdata (chdr_tdata[inst_i]), + .m_axis_chdr_tlast (chdr_tlast[inst_i]), + .m_axis_chdr_tvalid (chdr_tvalid[inst_i]), + .m_axis_chdr_tready (chdr_tready[inst_i]), + .s_axis_payload_tdata (r2c_pyld[inst_i].slave.tdata[(INST_PARAMS[inst_i].item_w*INST_PARAMS[inst_i].nipc)-1:0]), + .s_axis_payload_tkeep (r2c_pyld[inst_i].slave.tkeep[INST_PARAMS[inst_i].nipc-1:0]), + .s_axis_payload_tlast (r2c_pyld[inst_i].slave.tlast), + .s_axis_payload_tvalid(r2c_pyld[inst_i].slave.tvalid), + .s_axis_payload_tready(r2c_pyld[inst_i].slave.tready), + .s_axis_context_tdata (r2c_ctxt[inst_i].slave.tdata), + .s_axis_context_tuser (r2c_ctxt[inst_i].slave.tuser), + .s_axis_context_tlast (r2c_ctxt[inst_i].slave.tlast), + .s_axis_context_tvalid(r2c_ctxt[inst_i].slave.tvalid), + .s_axis_context_tready(r2c_ctxt[inst_i].slave.tready), + .framer_errors (r2c_framer_errors[inst_i]), + .flush_en (r2c_flush_en[inst_i]), + .flush_timeout (r2c_flush_timeout[inst_i]), + .flush_active (r2c_flush_active[inst_i]), + .flush_done (r2c_flush_done[inst_i]) + ); + + // Instantiate Chdr to PyldCtxt DUT + chdr_to_axis_pyld_ctxt #( + .CHDR_W (CHDR_W), + .ITEM_W (INST_PARAMS[inst_i].item_w), + .NIPC (INST_PARAMS[inst_i].nipc), + .SYNC_CLKS (INST_PARAMS[inst_i].clk_period == CHDR_CLK_PERIOD), + .CONTEXT_FIFO_SIZE (INST_PARAMS[inst_i].ctxt_fifo), + .PAYLOAD_FIFO_SIZE (INST_PARAMS[inst_i].pyld_fifo), + .CONTEXT_PREFETCH_EN (INST_PARAMS[inst_i].prefetch) + ) c2r_dut ( + .axis_chdr_clk (rfnoc_chdr_clk), + .axis_chdr_rst (rfnoc_chdr_rst), + .axis_data_clk (rfnoc_data_clk[inst_i]), + .axis_data_rst (rfnoc_data_rst[inst_i]), + .s_axis_chdr_tdata (chdr_tdata[inst_i]), + .s_axis_chdr_tlast (chdr_tlast[inst_i]), + .s_axis_chdr_tvalid (chdr_tvalid[inst_i]), + .s_axis_chdr_tready (chdr_tready[inst_i]), + .m_axis_payload_tdata (c2r_pyld[inst_i].master.tdata[(INST_PARAMS[inst_i].item_w*INST_PARAMS[inst_i].nipc)-1:0]), + .m_axis_payload_tkeep (c2r_pyld[inst_i].master.tkeep[INST_PARAMS[inst_i].nipc-1:0]), + .m_axis_payload_tlast (c2r_pyld[inst_i].master.tlast), + .m_axis_payload_tvalid(c2r_pyld[inst_i].master.tvalid), + .m_axis_payload_tready(c2r_pyld[inst_i].master.tready), + .m_axis_context_tdata (c2r_ctxt[inst_i].master.tdata), + .m_axis_context_tuser (c2r_ctxt[inst_i].master.tuser), + .m_axis_context_tlast (c2r_ctxt[inst_i].master.tlast), + .m_axis_context_tvalid(c2r_ctxt[inst_i].master.tvalid), + .m_axis_context_tready(c2r_ctxt[inst_i].master.tready), + .flush_en (c2r_flush_en[inst_i]), + .flush_timeout (c2r_flush_timeout[inst_i]), + .flush_active (c2r_flush_active[inst_i]), + .flush_done (c2r_flush_done[inst_i]) + ); + + // Assert Reset and start BFMs + initial begin + dclk_gen.reset(); + r2c_flush_en[inst_i] = 0; + c2r_flush_en[inst_i] = 0; + pyld_bfm[inst_i] = new(r2c_pyld[inst_i], c2r_pyld[inst_i]); + pyld_bfm[inst_i].run(); + ctxt_bfm[inst_i] = new(r2c_ctxt[inst_i], c2r_ctxt[inst_i]); + ctxt_bfm[inst_i].run(); + end + end endgenerate + + function automatic bit pyld_pkts_equal( + ref AxiStreamPacket #(MAX_PYLD_W) exp, + ref AxiStreamPacket #(MAX_PYLD_W) act, + input int item_w, + input int nipc + ); + if (exp.data.size() != act.data.size()) return 0; + if (exp.keep.size() != act.keep.size()) return 0; + for (int i = 0; i < exp.data.size(); i++) begin + // Convert to bit + automatic bit [MAX_PYLD_W-1:0] mask = '0; + automatic bit [MAX_PYLD_W-1:0] data_exp = exp.data[i]; + automatic bit [MAX_PYLD_W-1:0] data_act = act.data[i]; + for (int r = 0; r < nipc; r++) begin + if (exp.keep[i][r] === 1'b1) begin + automatic bit [MAX_PYLD_W-1:0] samp_mask = ((1<<item_w)-1); + mask |= (samp_mask << (r*item_w)); + end + end + if (exp.keep[i] !== act.keep[i]) return 0; + if ((data_exp&mask) !== (data_act&mask)) return 0; + end + return 1; + endfunction + + task automatic send_recv_data_packets( + input int inst, + input inst_params_t params, //We pass this separately to work around Vivado bug + input int num_pkts, + input int mst_stall_prob, + input int slv_stall_prob, + input bit flushing = 0 + ); + int nipc = params.nipc; + int item_w = params.item_w; + bit prefetch = params.prefetch; + + AxiStreamPacket #(MAX_PYLD_W) pyld_pkt_arr[$] = {}; + AxiStreamPacket #(CHDR_W, 4) ctxt_pkt_arr[$] = {}; + + // Set stall probabilities + ctxt_bfm[inst].set_master_stall_prob(mst_stall_prob); + ctxt_bfm[inst].set_slave_stall_prob(slv_stall_prob); + pyld_bfm[inst].set_master_stall_prob(mst_stall_prob); + pyld_bfm[inst].set_slave_stall_prob(slv_stall_prob); + + // Generate a stream of data packets + for (int p = 0; p < num_pkts; p++) begin + int len_lines = $urandom_range((MTU_BITS/(item_w*nipc))-10, 1); + int keep_int = $urandom_range(nipc, 1); + pyld_pkt_arr[p] = new(); + for (int i = 0; i < len_lines; i++) begin + logic [MAX_PYLD_W-1:0] rand_samp; + logic [(MAX_PYLD_W/8)-1:0] keep_val = 'x; + for (int r = 0; r < (((nipc*item_w)+31)/32); r++) + rand_samp[r*32 +: 32] = $urandom(); + pyld_pkt_arr[p].data.push_back(rand_samp); + pyld_pkt_arr[p].user.push_back('x); + for (int r = 0; r < nipc; r++) begin + if (i == len_lines-1) + keep_val[r] = (r < keep_int) ? 1'b1 : 1'b0; + else + keep_val[r] = 1'b1; + end + pyld_pkt_arr[p].keep.push_back(keep_val); + end + end + + // Generate context packet for each data packet + foreach (pyld_pkt_arr[p]) begin + automatic chdr_header_t chdr_hdr; + automatic bit has_time = $urandom_range(1); + automatic int num_mdata = $urandom_range(5); + automatic int num_pyld_lines = pyld_pkt_arr[p].data.size(); + automatic int invalid_samps = 0; + automatic int length; + for (int r = 0; r < nipc; r++) + if (pyld_pkt_arr[p].keep[num_pyld_lines-1][r] === 1'b0) + invalid_samps++; + length = + (CHDR_W/8) + // header + ((has_time && (CHDR_W == 64)) ? (CHDR_W/8) : 0) + // timestamp + (num_mdata * (CHDR_W/8)) + // metadata + (num_pyld_lines * nipc * (item_w/8)) + // payload + (-invalid_samps * (item_w/8)); // payload (back out empty slots) + + chdr_hdr = '{ + vc : $urandom_range(63), + eob : $urandom_range(1), + eov : $urandom_range(1), + pkt_type : has_time ? CHDR_DATA_WITH_TS : CHDR_DATA_NO_TS, + num_mdata : num_mdata, + seq_num : p, + length : length, + dst_epid : $urandom() + }; + + ctxt_pkt_arr[p] = new(); + ctxt_pkt_arr[p].data.push_back(chdr_hdr); + ctxt_pkt_arr[p].user.push_back((has_time && (CHDR_W > 64)) ? CONTEXT_FIELD_HDR_TS : CONTEXT_FIELD_HDR); + ctxt_pkt_arr[p].keep.push_back('x); + if (has_time && (CHDR_W == 64)) begin + ctxt_pkt_arr[p].data.push_back(~p); + ctxt_pkt_arr[p].user.push_back(CONTEXT_FIELD_TS); + ctxt_pkt_arr[p].keep.push_back('x); + end + for (int i = 0; i < num_mdata; i++) begin + ctxt_pkt_arr[p].data.push_back(i); + ctxt_pkt_arr[p].user.push_back(CONTEXT_FIELD_MDATA); + ctxt_pkt_arr[p].keep.push_back('x); + end + end + + // Spin up 4 threads: {RX, TX} x {Context, Payload} + fork + begin: tx_context + timeout_t timeout; + for (int p = 0; p < num_pkts; p++) begin + test.start_timeout(timeout, 50us, "Waiting to send TX context pkt"); + ctxt_bfm[inst].put(ctxt_pkt_arr[p].copy()); + test.end_timeout(timeout); + if (VERBOSE) $display("[INST%0d:TxContext:%0d]\n%s", inst, p, ctxt_pkt_arr[p].sprint()); + end + end + begin: tx_payload + timeout_t timeout; + for (int p = 0; p < num_pkts; p++) begin + test.start_timeout(timeout, 50us, "Waiting to send TX payload pkt"); + pyld_bfm[inst].put(pyld_pkt_arr[p].copy()); + test.end_timeout(timeout); + if (VERBOSE) $display("[INST%0d:TxPayload:%0d]\n%s", inst, p, pyld_pkt_arr[p].sprint()); + end + end + begin: rx_context + if (!flushing) begin + timeout_t timeout; + automatic AxiStreamPacket #(CHDR_W, 4) rx_ctxt_pkt; + for (int p = 0; p < num_pkts; p++) begin + test.start_timeout(timeout, 50us, "Waiting to recv RX context pkt"); + ctxt_bfm[inst].get(rx_ctxt_pkt); + test.end_timeout(timeout); + if (VERBOSE) $display("[INST%0d:RxContext:%0d]\n%s", inst, p, rx_ctxt_pkt.sprint()); + if (VERBOSE) $display("[INST%0d:ExpContext:%0d]\n%s", inst, p, ctxt_pkt_arr[p].sprint()); + `ASSERT_ERROR(ctxt_pkt_arr[p].equal(rx_ctxt_pkt), "RX context packet did not match TX"); + end + end + end + begin: rx_payload + if (!flushing) begin + timeout_t timeout; + automatic AxiStreamPacket #(MAX_PYLD_W) rx_pyld_pkt; + for (int p = 0; p < num_pkts; p++) begin + test.start_timeout(timeout, 50us, "Waiting to recv RX payload pkt"); + pyld_bfm[inst].get(rx_pyld_pkt); + test.end_timeout(timeout); + if (VERBOSE) $display("[INST%0d:RxPayload:%0d]\n%s", inst, p, rx_pyld_pkt.sprint()); + if (VERBOSE) $display("[INST%0d:ExpPayload:%0d]\n%s", inst, p, pyld_pkt_arr[p].sprint()); + `ASSERT_ERROR(pyld_pkts_equal(pyld_pkt_arr[p], rx_pyld_pkt, item_w, nipc), "RX payload packet did not match TX"); + end + end + end + join + endtask + + + // ---------------------------------------- + // Test Process + // ---------------------------------------- + initial begin + + // Shared Variables + // ---------------------------------------- + timeout_t timeout; + string tc_label; + + // Initialize + // ---------------------------------------- + test.start_tb("axis_pyld_ctxt_converter_tb"); + + // Reset + // ---------------------------------------- + chdr_clk_gen_i.reset(); + + test.start_test("Wait for reset"); + test.start_timeout(timeout, 1us, "Waiting for reset"); + while (rfnoc_chdr_rst) @(posedge rfnoc_chdr_clk); + while (|rfnoc_data_rst) @(posedge rfnoc_chdr_clk); + repeat (100) @(posedge rfnoc_chdr_clk); + test.end_timeout(timeout); + `ASSERT_ERROR(!rfnoc_chdr_rst && !(|rfnoc_data_rst), "Reset did not deassert"); + test.end_test(); + + for (int inst_num = START_INST; inst_num <= STOP_INST; inst_num++) begin + $display("-----------------------------------------------------------------------------------------------"); + $display("Testing INST%0d:%p", inst_num, INST_PARAMS[inst_num]); + $display("-----------------------------------------------------------------------------------------------"); + + // Stream Random Data + // ---------------------------------------- + for (int cfg = 0; cfg < 4; cfg++) begin + automatic integer mst_cfg = cfg[0]; + automatic integer slv_cfg = cfg[1]; + $sformat(tc_label, "INST%0d: Stream Random Data (%s Mst, %s Slv)", + inst_num,(mst_cfg?"Slow":"Fast"), (slv_cfg?"Slow":"Fast")); + test.start_test(tc_label); + send_recv_data_packets(inst_num, INST_PARAMS[inst_num], NUM_PKTS_PER_TEST, + mst_cfg ? SLOW_STALL_PROB : FAST_STALL_PROB, + slv_cfg ? SLOW_STALL_PROB : FAST_STALL_PROB + ); + `ASSERT_ERROR(r2c_framer_errors[inst_num] === '0, "Encountered framer errors"); + test.end_test(); + end + + // Flush + // ---------------------------------------- + $sformat(tc_label, "INST%0d: Flush PyldCtxt => CHDR (Idle)", inst_num); + test.start_test(tc_label); + r2c_flush_timeout[inst_num] = $urandom_range(400, 200); + r2c_flush_en[inst_num] = 1'b1; + repeat (100) @(posedge rfnoc_chdr_clk); + `ASSERT_ERROR(r2c_flush_active[inst_num] === 1, "Flushing did not begin on time"); + `ASSERT_ERROR(r2c_flush_done[inst_num] === 0, "Flushing ended prematurely"); + repeat (r2c_flush_timeout[inst_num] + 1) @(posedge rfnoc_chdr_clk); + `ASSERT_ERROR(r2c_flush_done[inst_num] === 1, "Flushing did not end on time"); + r2c_flush_en[inst_num] = 1'b0; + @(posedge rfnoc_chdr_clk); + test.end_test(); + + $sformat(tc_label, "INST%0d: Flush CHDR => PyldCtxt (Idle)", inst_num); + test.start_test(tc_label); + c2r_flush_timeout[inst_num] = $urandom_range(400, 200); + c2r_flush_en[inst_num] = 1'b1; + repeat (100) @(posedge rfnoc_data_clk[inst_num]); + `ASSERT_ERROR(c2r_flush_active[inst_num] === 1, "Flushing did not begin on time"); + `ASSERT_ERROR(c2r_flush_done[inst_num] === 0, "Flushing ended prematurely"); + repeat (c2r_flush_timeout[inst_num] + 1) @(posedge rfnoc_data_clk[inst_num]); + `ASSERT_ERROR(c2r_flush_done[inst_num] === 1, "Flushing did not end on time"); + c2r_flush_en[inst_num] = 1'b0; + @(posedge rfnoc_data_clk[inst_num]); + test.end_test(); + + $sformat(tc_label, "INST%0d: Flush PyldCtxt => CHDR (Streaming)", inst_num); + test.start_test(tc_label); + r2c_flush_timeout[inst_num] = $urandom_range(400, 200); + r2c_flush_en[inst_num] = 1'b1; + repeat (100) @(posedge rfnoc_chdr_clk); + `ASSERT_ERROR(r2c_flush_active[inst_num] === 1, "Flushing did not begin on time"); + `ASSERT_ERROR(r2c_flush_done[inst_num] === 0, "Flushing ended prematurely"); + send_recv_data_packets(inst_num, INST_PARAMS[inst_num], NUM_PKTS_PER_TEST/10, + FAST_STALL_PROB, FAST_STALL_PROB, 1 /*flushing*/ + ); + repeat (NUM_PKTS_PER_TEST/10 * (1<<MTU) * 4) @(posedge rfnoc_chdr_clk); + repeat (r2c_flush_timeout[inst_num] + 1) @(posedge rfnoc_chdr_clk); + `ASSERT_ERROR(r2c_flush_done[inst_num] === 1, "Flushing did not end on time"); + r2c_flush_en[inst_num] = 1'b0; + @(posedge rfnoc_chdr_clk); + test.end_test(); + + $sformat(tc_label, "INST%0d: Stream Data After Flush", inst_num); + test.start_test(tc_label); + send_recv_data_packets(inst_num, INST_PARAMS[inst_num], NUM_PKTS_PER_TEST/10, + FAST_STALL_PROB, FAST_STALL_PROB + ); + `ASSERT_ERROR(r2c_framer_errors[inst_num] === '0, "Encountered framer errors"); + test.end_test(); + end + + // Finish Up + // ---------------------------------------- + // Display final statistics and results + test.end_tb(); + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/sim/chdr_stream_endpoint_tb/Makefile b/fpga/usrp3/lib/rfnoc/sim/chdr_stream_endpoint_tb/Makefile new file mode 100644 index 000000000..b2773db02 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/sim/chdr_stream_endpoint_tb/Makefile @@ -0,0 +1,44 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs + +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(RFNOC_CORE_SRCS) \ +$(RFNOC_XBAR_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +SIM_TOP = chdr_stream_endpoint_tb + +SIM_SRCS = \ +$(abspath lossy_xport_model.v) \ +$(abspath chdr_stream_endpoint_tb.sv) \ + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/sim/chdr_stream_endpoint_tb/chdr_stream_endpoint_tb.sv b/fpga/usrp3/lib/rfnoc/sim/chdr_stream_endpoint_tb/chdr_stream_endpoint_tb.sv new file mode 100644 index 000000000..0626ee447 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/sim/chdr_stream_endpoint_tb/chdr_stream_endpoint_tb.sv @@ -0,0 +1,1149 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_stream_endpoint_tb +// + +`default_nettype none + + +module chdr_stream_endpoint_tb; + + // ---------------------------------------- + // Global settings + // ---------------------------------------- + + // Include macros and time declarations for use with PkgTestExec + `include "test_exec.svh" + + import PkgTestExec::*; + import PkgChdrUtils::*; + import PkgChdrBfm::*; + + // Clocks and resets + bit rfnoc_chdr_clk, rfnoc_chdr_rst; + bit rfnoc_ctrl_clk, rfnoc_ctrl_rst; + sim_clock_gen #(6.0) rfnoc_chdr_clk_gen (rfnoc_chdr_clk, rfnoc_chdr_rst); // 166.6 MHz + sim_clock_gen #(20.0) rfnoc_ctrl_clk_gen (rfnoc_ctrl_clk, rfnoc_ctrl_rst); // 50 MHz + + // Parameters + localparam bit VERBOSE = 0; + localparam int NUM_PKTS_PER_TEST = 200; + localparam int FAST_STALL_PROB = 0; + localparam int SLOW_STALL_PROB = 35; + + localparam int CHDR_W = 64; + localparam int MTU = 7; + localparam [15:0] PROTOVER = {8'd1, 8'd0}; + localparam [15:0] DEV_ID = 16'hBEEF; + localparam [15:0] EPID_TB = 16'h1001; + localparam [15:0] EPID_A = 16'h1002; + localparam [15:0] EPID_B = 16'h1003; + localparam [9:0] PORT_TB = 10'd0; + localparam [9:0] PORT_A = 10'd1; + localparam [9:0] PORT_B = 10'd2; + + // ---------------------------------------- + // DUT (and Crossbar) Instantiations + // ---------------------------------------- + wire [CHDR_W-1:0] c2ae_chdr_tdata , c2ax_chdr_tdata , a2c_chdr_tdata ; + wire c2ae_chdr_tlast , c2ax_chdr_tlast , a2c_chdr_tlast ; + wire c2ae_chdr_tvalid, c2ax_chdr_tvalid, a2c_chdr_tvalid; + wire c2ae_chdr_tready, c2ax_chdr_tready, a2c_chdr_tready; + wire [CHDR_W-1:0] c2be_chdr_tdata , c2bx_chdr_tdata , b2c_chdr_tdata ; + wire c2be_chdr_tlast , c2bx_chdr_tlast , b2c_chdr_tlast ; + wire c2be_chdr_tvalid, c2bx_chdr_tvalid, b2c_chdr_tvalid; + wire c2be_chdr_tready, c2bx_chdr_tready, b2c_chdr_tready; + + wire [31:0] a_ctrl_in_tdata, a_ctrl_out_tdata, b_ctrl_in_tdata, b_ctrl_out_tdata; + wire a_ctrl_loop_tlast , b_ctrl_loop_tlast ; + wire a_ctrl_loop_tvalid, b_ctrl_loop_tvalid; + wire a_ctrl_loop_tready, b_ctrl_loop_tready; + + logic a_signal_data_err, b_signal_data_err; + logic a_lossy_input, b_lossy_input; + logic [7:0] a_seqerr_prob, b_seqerr_prob; + logic [7:0] a_rterr_prob, b_rterr_prob; + + AxiStreamIf #(CHDR_W) m_tb_chdr (rfnoc_chdr_clk, rfnoc_chdr_rst); + AxiStreamIf #(CHDR_W) s_tb_chdr (rfnoc_chdr_clk, rfnoc_chdr_rst); + + AxiStreamIf #(CHDR_W) m_a0_data (rfnoc_chdr_clk, rfnoc_chdr_rst); + AxiStreamIf #(CHDR_W) s_a0_data (rfnoc_chdr_clk, rfnoc_chdr_rst); + AxiStreamIf #(CHDR_W) m_a1_data (rfnoc_chdr_clk, rfnoc_chdr_rst); + AxiStreamIf #(CHDR_W) s_a1_data (rfnoc_chdr_clk, rfnoc_chdr_rst); + AxiStreamIf #(CHDR_W) m_b0_data (rfnoc_chdr_clk, rfnoc_chdr_rst); + AxiStreamIf #(CHDR_W) s_b0_data (rfnoc_chdr_clk, rfnoc_chdr_rst); + AxiStreamIf #(CHDR_W) m_b1_data (rfnoc_chdr_clk, rfnoc_chdr_rst); + AxiStreamIf #(CHDR_W) s_b1_data (rfnoc_chdr_clk, rfnoc_chdr_rst); + + chdr_stream_endpoint #( + .PROTOVER (PROTOVER), + .CHDR_W (CHDR_W), + .AXIS_CTRL_EN (1), + .AXIS_DATA_EN (1), + .INST_NUM (0), + .NUM_DATA_I (2), + .NUM_DATA_O (2), + .CTRL_XBAR_PORT (PORT_A), + .INGRESS_BUFF_SIZE (MTU+1), + .MTU (MTU), + .REPORT_STRM_ERRS (1), + .SIM_SPEEDUP (1) + ) sep_a ( + .rfnoc_chdr_clk (rfnoc_chdr_clk ), + .rfnoc_chdr_rst (rfnoc_chdr_rst ), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk ), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst ), + .device_id (DEV_ID ), + .s_axis_chdr_tdata (c2ae_chdr_tdata ), + .s_axis_chdr_tlast (c2ae_chdr_tlast ), + .s_axis_chdr_tvalid (c2ae_chdr_tvalid ), + .s_axis_chdr_tready (c2ae_chdr_tready ), + .m_axis_chdr_tdata (a2c_chdr_tdata ), + .m_axis_chdr_tlast (a2c_chdr_tlast ), + .m_axis_chdr_tvalid (a2c_chdr_tvalid ), + .m_axis_chdr_tready (a2c_chdr_tready ), + .s_axis_data_tdata ({m_a1_data.slave.tdata , m_a0_data.slave.tdata }), + .s_axis_data_tlast ({m_a1_data.slave.tlast , m_a0_data.slave.tlast }), + .s_axis_data_tvalid ({m_a1_data.slave.tvalid , m_a0_data.slave.tvalid }), + .s_axis_data_tready ({m_a1_data.slave.tready , m_a0_data.slave.tready }), + .m_axis_data_tdata ({s_a1_data.master.tdata , s_a0_data.master.tdata }), + .m_axis_data_tlast ({s_a1_data.master.tlast , s_a0_data.master.tlast }), + .m_axis_data_tvalid ({s_a1_data.master.tvalid, s_a0_data.master.tvalid}), + .m_axis_data_tready ({s_a1_data.master.tready, s_a0_data.master.tready}), + .s_axis_ctrl_tdata (a_ctrl_out_tdata ), + .s_axis_ctrl_tlast (a_ctrl_loop_tlast ), + .s_axis_ctrl_tvalid (a_ctrl_loop_tvalid ), + .s_axis_ctrl_tready (a_ctrl_loop_tready ), + .m_axis_ctrl_tdata (a_ctrl_in_tdata ), + .m_axis_ctrl_tlast (a_ctrl_loop_tlast ), + .m_axis_ctrl_tvalid (a_ctrl_loop_tvalid ), + .m_axis_ctrl_tready (a_ctrl_loop_tready ), + .strm_seq_err_stb ( ), + .strm_data_err_stb ( ), + .strm_route_err_stb ( ), + .signal_data_err (a_signal_data_err ) + ); + + chdr_stream_endpoint #( + .PROTOVER (PROTOVER), + .CHDR_W (CHDR_W), + .AXIS_CTRL_EN (1), + .AXIS_DATA_EN (1), + .INST_NUM (1), + .NUM_DATA_I (2), + .NUM_DATA_O (2), + .CTRL_XBAR_PORT (PORT_B), + .INGRESS_BUFF_SIZE (MTU+1), + .MTU (MTU), + .REPORT_STRM_ERRS (1), + .SIM_SPEEDUP (1) + ) sep_b ( + .rfnoc_chdr_clk (rfnoc_chdr_clk ), + .rfnoc_chdr_rst (rfnoc_chdr_rst ), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk ), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst ), + .device_id (DEV_ID ), + .s_axis_chdr_tdata (c2be_chdr_tdata ), + .s_axis_chdr_tlast (c2be_chdr_tlast ), + .s_axis_chdr_tvalid (c2be_chdr_tvalid ), + .s_axis_chdr_tready (c2be_chdr_tready ), + .m_axis_chdr_tdata (b2c_chdr_tdata ), + .m_axis_chdr_tlast (b2c_chdr_tlast ), + .m_axis_chdr_tvalid (b2c_chdr_tvalid ), + .m_axis_chdr_tready (b2c_chdr_tready ), + .s_axis_data_tdata ({m_b1_data.slave.tdata , m_b0_data.slave.tdata }), + .s_axis_data_tlast ({m_b1_data.slave.tlast , m_b0_data.slave.tlast }), + .s_axis_data_tvalid ({m_b1_data.slave.tvalid , m_b0_data.slave.tvalid }), + .s_axis_data_tready ({m_b1_data.slave.tready , m_b0_data.slave.tready }), + .m_axis_data_tdata ({s_b1_data.master.tdata , s_b0_data.master.tdata }), + .m_axis_data_tlast ({s_b1_data.master.tlast , s_b0_data.master.tlast }), + .m_axis_data_tvalid ({s_b1_data.master.tvalid, s_b0_data.master.tvalid}), + .m_axis_data_tready ({s_b1_data.master.tready, s_b0_data.master.tready}), + .s_axis_ctrl_tdata (b_ctrl_out_tdata ), + .s_axis_ctrl_tlast (b_ctrl_loop_tlast ), + .s_axis_ctrl_tvalid (b_ctrl_loop_tvalid ), + .s_axis_ctrl_tready (b_ctrl_loop_tready ), + .m_axis_ctrl_tdata (b_ctrl_in_tdata ), + .m_axis_ctrl_tlast (b_ctrl_loop_tlast ), + .m_axis_ctrl_tvalid (b_ctrl_loop_tvalid ), + .m_axis_ctrl_tready (b_ctrl_loop_tready ), + .strm_seq_err_stb ( ), + .strm_data_err_stb ( ), + .strm_route_err_stb ( ), + .signal_data_err (b_signal_data_err ) + ); + + chdr_crossbar_nxn #( + .CHDR_W (CHDR_W), + .NPORTS (3), + .DEFAULT_PORT (0), + .MTU (MTU), + .ROUTE_TBL_SIZE (6), + .MUX_ALLOC ("ROUND-ROBIN"), + .OPTIMIZE ("AREA"), + .NPORTS_MGMT (1), + .EXT_RTCFG_PORT (0), + .PROTOVER (PROTOVER) + ) xbar_c ( + .clk (rfnoc_chdr_clk), + .reset (rfnoc_chdr_rst), + .device_id (DEV_ID), + .s_axis_tdata ({b2c_chdr_tdata, a2c_chdr_tdata, m_tb_chdr.slave.tdata }), + .s_axis_tlast ({b2c_chdr_tlast, a2c_chdr_tlast, m_tb_chdr.slave.tlast }), + .s_axis_tvalid ({b2c_chdr_tvalid, a2c_chdr_tvalid, m_tb_chdr.slave.tvalid }), + .s_axis_tready ({b2c_chdr_tready, a2c_chdr_tready, m_tb_chdr.slave.tready }), + .m_axis_tdata ({c2bx_chdr_tdata, c2ax_chdr_tdata, s_tb_chdr.master.tdata }), + .m_axis_tlast ({c2bx_chdr_tlast, c2ax_chdr_tlast, s_tb_chdr.master.tlast }), + .m_axis_tvalid ({c2bx_chdr_tvalid, c2ax_chdr_tvalid, s_tb_chdr.master.tvalid}), + .m_axis_tready ({c2bx_chdr_tready, c2ax_chdr_tready, s_tb_chdr.master.tready}), + .ext_rtcfg_stb ('0), + .ext_rtcfg_addr ('0), + .ext_rtcfg_data ('0), + .ext_rtcfg_ack () + ); + + lossy_xport_model #( .CHDR_W(CHDR_W) ) xport_a ( + .clk (rfnoc_chdr_clk ), + .rst (rfnoc_chdr_rst ), + .s_axis_tdata (c2ax_chdr_tdata ), + .s_axis_tlast (c2ax_chdr_tlast ), + .s_axis_tvalid (c2ax_chdr_tvalid), + .s_axis_tready (c2ax_chdr_tready), + .m_axis_tdata (c2ae_chdr_tdata ), + .m_axis_tlast (c2ae_chdr_tlast ), + .m_axis_tvalid (c2ae_chdr_tvalid), + .m_axis_tready (c2ae_chdr_tready), + .seqerr_prob (a_seqerr_prob ), + .rterr_prob (a_rterr_prob ), + .lossy (a_lossy_input ) + ); + + lossy_xport_model #( .CHDR_W(CHDR_W) ) xport_b ( + .clk (rfnoc_chdr_clk ), + .rst (rfnoc_chdr_rst ), + .s_axis_tdata (c2bx_chdr_tdata ), + .s_axis_tlast (c2bx_chdr_tlast ), + .s_axis_tvalid (c2bx_chdr_tvalid), + .s_axis_tready (c2bx_chdr_tready), + .m_axis_tdata (c2be_chdr_tdata ), + .m_axis_tlast (c2be_chdr_tlast ), + .m_axis_tvalid (c2be_chdr_tvalid), + .m_axis_tready (c2be_chdr_tready), + .seqerr_prob (b_seqerr_prob ), + .rterr_prob (b_rterr_prob ), + .lossy (b_lossy_input ) + ); + + // ---------------------------------------- + // BFMs and Test Models + // ---------------------------------------- + + ChdrBfm #(CHDR_W) a0_data_bfm = new(m_a0_data, s_a0_data); + ChdrBfm #(CHDR_W) b0_data_bfm = new(m_b0_data, s_b0_data); + ChdrBfm #(CHDR_W) a1_data_bfm = new(m_a1_data, s_a1_data); + ChdrBfm #(CHDR_W) b1_data_bfm = new(m_b1_data, s_b1_data); + ChdrBfm #(CHDR_W) tb_chdr_bfm = new(m_tb_chdr, s_tb_chdr); + + // Simple responders for AXIS-Ctrl transactions + reg a_first = 1'b1, b_first = 1'b1; + always @(posedge rfnoc_ctrl_clk) begin + if (rfnoc_ctrl_rst) begin + a_first <= 1'd1; + b_first <= 1'd1; + end else begin + if (a_ctrl_loop_tvalid & a_ctrl_loop_tready) + a_first <= a_ctrl_loop_tlast; + if (b_ctrl_loop_tvalid & b_ctrl_loop_tready) + b_first <= b_ctrl_loop_tlast; + end + end + // Respond with an ACK and the source and destination ports swapped + assign a_ctrl_out_tdata = + a_first ? {1'b1, a_ctrl_in_tdata[30:20], a_ctrl_in_tdata[9:0], a_ctrl_in_tdata[19:10]} : a_ctrl_in_tdata; + assign b_ctrl_out_tdata = + b_first ? {1'b1, b_ctrl_in_tdata[30:20], b_ctrl_in_tdata[9:0], b_ctrl_in_tdata[19:10]} : b_ctrl_in_tdata; + + // ---------------------------------------- + // Test Utilities + // ---------------------------------------- + integer cached_mgmt_seqnum = 0; + integer cached_ctrl_seqnum = 0; + integer cached_data_seqnum = 0; + + task automatic send_recv_mgmt_packet( + input chdr_header_t tx_mgmt_hdr, + input chdr_mgmt_t tx_mgmt_pl, + output chdr_header_t rx_mgmt_hdr, + output chdr_mgmt_t rx_mgmt_pl + ); + automatic timeout_t mgmt_timeout; + automatic ChdrPacket #(CHDR_W) tx_chdr = new(), rx_chdr; + tx_chdr.write_mgmt(tx_mgmt_hdr, tx_mgmt_pl); + test.start_timeout(mgmt_timeout, 2us, "Waiting for management transaction"); + if (VERBOSE) begin $write("Tx"); tx_chdr.print(); end + tb_chdr_bfm.put_chdr(tx_chdr.copy()); + tb_chdr_bfm.get_chdr(rx_chdr); + test.end_timeout(mgmt_timeout); + rx_chdr.read_mgmt(rx_mgmt_hdr, rx_mgmt_pl); + if (VERBOSE) begin $write("Rx"); rx_chdr.print(); end + endtask + + task automatic mgmt_read_err_counts( + input [15:0] dst_epid, + output [31:0] seq_err_count, + output [31:0] route_err_count, + output [31:0] data_err_count + ); + automatic chdr_header_t tx_mgmt_hdr, rx_mgmt_hdr; + automatic chdr_mgmt_t tx_mgmt_pl, rx_mgmt_pl; + automatic chdr_mgmt_op_t exp_mgmt_op; + + // Generic management header + tx_mgmt_pl.header = '{ + default:'0, prot_ver:PROTOVER, chdr_width:translate_chdr_w(CHDR_W), src_epid:EPID_TB + }; + // Read error counts + tx_mgmt_pl.header.num_hops = 3; + tx_mgmt_pl.ops.delete(); + + tx_mgmt_pl.ops[0] = '{ // Hop 1: Crossbar: Nop + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_pl.ops[1] = '{ // Hop 2: Read status + op_payload:{32'h0, sep_a.REG_OSTRM_SEQ_ERR_CNT}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd3}; + tx_mgmt_pl.ops[2] = '{ // Hop 2: Read status + op_payload:{32'h0, sep_a.REG_OSTRM_DATA_ERR_CNT}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd2}; + tx_mgmt_pl.ops[3] = '{ // Hop 2: Read status + op_payload:{32'h0, sep_a.REG_OSTRM_ROUTE_ERR_CNT}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd1}; + tx_mgmt_pl.ops[4] = '{ // Hop 2: Stream Endpoint: Return + op_payload:48'h0, op_code:MGMT_OP_RETURN, ops_pending:8'd0}; + tx_mgmt_pl.ops[5] = '{ // Hop 3: Nop for return + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_hdr = '{ + pkt_type:CHDR_MANAGEMENT, seq_num:cached_mgmt_seqnum++, dst_epid:dst_epid, default:'0}; + + // Send the packet and ensure that error counts are zero + send_recv_mgmt_packet(tx_mgmt_hdr, tx_mgmt_pl, rx_mgmt_hdr, rx_mgmt_pl); + `ASSERT_ERROR(rx_mgmt_pl.header.num_hops == 1, + "Check Errs: Mgmt header was incorrect"); + seq_err_count = 32'hx; + route_err_count = 32'hx; + data_err_count = 32'hx; + for (int i = 1; i <= 3; i++) begin + if (rx_mgmt_pl.ops[i].op_payload[15:0] == sep_a.REG_OSTRM_SEQ_ERR_CNT) + seq_err_count = rx_mgmt_pl.ops[i].op_payload[47:16]; + else if (rx_mgmt_pl.ops[i].op_payload[15:0] == sep_a.REG_OSTRM_DATA_ERR_CNT) + data_err_count = rx_mgmt_pl.ops[i].op_payload[47:16]; + else if (rx_mgmt_pl.ops[i].op_payload[15:0] == sep_a.REG_OSTRM_ROUTE_ERR_CNT) + route_err_count = rx_mgmt_pl.ops[i].op_payload[47:16]; + end + endtask + + task automatic send_recv_ctrl_packets( + input [15:0] dst_epid, + input [15:0] num_pkts, + input [15:0] seq_num_start + ); + for (int n = 0; n < num_pkts; n=n+1) begin + automatic timeout_t ctrl_timeout; + automatic ChdrPacket #(CHDR_W) tx_chdr = new(), rx_chdr, exp_chdr = new(); + automatic chdr_header_t chdr_hdr; + automatic chdr_ctrl_header_t ctrl_hdr, exp_ctrl_hdr; + automatic ctrl_op_word_t ctrl_op; + automatic ctrl_word_t ctrl_data[$]; + automatic chdr_word_t ctrl_ts; + + ctrl_data.delete(); + for (int i = 0; i < $urandom_range(15,1); i++) + ctrl_data[i] = $urandom(); + ctrl_hdr = '{ + default : '0, + src_epid : EPID_TB, + is_ack : 1'b0, + has_time : $urandom_range(1), + seq_num : seq_num_start[5:0], + num_data : ctrl_data.size(), + src_port : $urandom(), + dst_port : $urandom() + }; + ctrl_ts = $urandom(); + ctrl_op = '{ + default : '0, + op_code : ctrl_opcode_t'($urandom_range(9)), + byte_enable : $urandom_range(15), + address : $urandom() + }; + chdr_hdr = '{ + dst_epid : dst_epid, + seq_num : seq_num_start + n[15:0], + pkt_type : CHDR_CONTROL, + default : 0 + }; + tx_chdr.write_ctrl(chdr_hdr, ctrl_hdr, ctrl_op, ctrl_data, ctrl_ts); + + test.start_timeout(ctrl_timeout, 2us, "Waiting for management transaction"); + if (VERBOSE) begin $write("Tx"); tx_chdr.print(); end + tb_chdr_bfm.put_chdr(tx_chdr.copy()); + tb_chdr_bfm.get_chdr(rx_chdr); + test.end_timeout(ctrl_timeout); + if (VERBOSE) begin $write("Rx"); rx_chdr.print(); end + + exp_ctrl_hdr = ctrl_hdr; + exp_ctrl_hdr.dst_port = ctrl_hdr.src_port; + exp_ctrl_hdr.src_port = ctrl_hdr.dst_port; + exp_ctrl_hdr.src_epid = dst_epid; + exp_ctrl_hdr.is_ack = 1'b1; + + exp_chdr.write_ctrl(chdr_hdr, exp_ctrl_hdr, ctrl_op, ctrl_data, ctrl_ts); + exp_chdr.header.dst_epid = EPID_TB; + + + if (VERBOSE) begin $write("ExpRx"); exp_chdr.print(); end + + // Validate contents + `ASSERT_ERROR(exp_chdr.equal(rx_chdr), + "Received CHDR control packet was incorrect"); + end + endtask + + task automatic send_recv_data_packets( + input [15:0] src_epid, + input [15:0] dst_epid, + input [15:0] num_pkts, + input [15:0] seq_num_start, + input bit ignore_seq_route_errs = 0 + ); + // Pick a VC for this run randomly + logic [5:0] vc = $urandom_range(1); + fork + begin: tx_loop + for (int txi = 0; txi < num_pkts; txi=txi+1) begin + automatic timeout_t tx_timeout; + automatic ChdrPacket #(CHDR_W) tx_chdr = new(); + automatic chdr_header_t tx_hdr; + automatic chdr_word_t tx_ts; + automatic chdr_word_t tx_mdata[$]; + automatic chdr_word_t tx_data[$]; + // Fill data in the packet + tx_hdr = '{ + vc : vc, + dst_epid : dst_epid, + seq_num : seq_num_start + txi[15:0], + pkt_type : (txi%4==0) ? CHDR_DATA_WITH_TS : CHDR_DATA_NO_TS, + num_mdata : $urandom_range(5), + default : 0 + }; + tx_ts = txi; + tx_mdata.delete(); + for (int i = 0; i < tx_hdr.num_mdata; i++) + tx_mdata[i] = $urandom(); + tx_data.delete(); + for (int i = 0; i < $urandom_range((1<<MTU)-10); i++) + tx_data[i] = {txi << 16, i[15:0]}; + tx_chdr.write_raw(tx_hdr, tx_data, tx_mdata, tx_ts); + if (VERBOSE) $display("%s%0d:Tx:%0d:",(src_epid == EPID_A)?"A":"B", vc, txi, tx_chdr.sprint()); + // Send the packet + test.start_timeout(tx_timeout, 2us, "Waiting to send data packet"); + if (src_epid == EPID_A) + if (vc == 0) + a0_data_bfm.put_chdr(tx_chdr.copy()); + else + a1_data_bfm.put_chdr(tx_chdr.copy()); + else + if (vc == 0) + b0_data_bfm.put_chdr(tx_chdr.copy()); + else + b1_data_bfm.put_chdr(tx_chdr.copy()); + test.end_timeout(tx_timeout); + end + end + begin: rx_loop + for (int rxi = 0; rxi < num_pkts; rxi=rxi+1) begin + automatic timeout_t rx_timeout; + automatic ChdrPacket #(CHDR_W) rx_chdr; + // Receive a packet + test.start_timeout(rx_timeout, 2us, "Waiting to recv data packet"); + if (dst_epid == EPID_A) + if (vc == 0) + a0_data_bfm.get_chdr(rx_chdr); + else + a1_data_bfm.get_chdr(rx_chdr); + else + if (vc == 0) + b0_data_bfm.get_chdr(rx_chdr); + else + b1_data_bfm.get_chdr(rx_chdr); + test.end_timeout(rx_timeout); + // Validate the packet + if (VERBOSE) $display("%s:Rx%0d:%0d:",(src_epid == EPID_A)?"A":"B", vc, rxi, rx_chdr.sprint()); + `ASSERT_ERROR(ignore_seq_route_errs || rx_chdr.header.dst_epid == dst_epid, "Data Pkt: dst_epid was incorrect"); + `ASSERT_ERROR(ignore_seq_route_errs || (rx_chdr.header.seq_num == rxi + seq_num_start), "Data Pkt: seq_num was incorrect"); + if (rx_chdr.header.pkt_type == CHDR_DATA_WITH_TS) + `ASSERT_ERROR(rx_chdr.timestamp == rxi, "Data Pkt: timestamp was incorrect"); + foreach (rx_chdr.data[i]) begin + `ASSERT_ERROR(rx_chdr.data[i] == {rxi << 16, i[15:0]}, "Data Pkt: payload was incorrect"); + end + end + end + join + endtask + + task automatic set_unidir_stall_prob( + input [15:0] src_epid, + input [15:0] dst_epid, + int src_stall_prob, + int dst_stall_prob + ); + if (src_epid == EPID_A) begin + a0_data_bfm.set_master_stall_prob(src_stall_prob); + a1_data_bfm.set_master_stall_prob(src_stall_prob); + b0_data_bfm.set_slave_stall_prob (dst_stall_prob); + b1_data_bfm.set_slave_stall_prob (dst_stall_prob); + end else begin + b0_data_bfm.set_master_stall_prob(src_stall_prob); + b1_data_bfm.set_master_stall_prob(src_stall_prob); + a0_data_bfm.set_slave_stall_prob (dst_stall_prob); + a1_data_bfm.set_slave_stall_prob (dst_stall_prob); + end + endtask + + task automatic set_bidir_stall_prob( + int src_stall_prob, + int dst_stall_prob + ); + set_unidir_stall_prob(EPID_A, EPID_B, src_stall_prob, dst_stall_prob); + set_unidir_stall_prob(EPID_B, EPID_A, src_stall_prob, dst_stall_prob); + endtask + + // ---------------------------------------- + // Test Process + // ---------------------------------------- + initial begin + + // Shared Variables + // ---------------------------------------- + timeout_t timeout; + string tc_label; + bit stop_responder = 0; + logic [31:0] seq_err_count; + logic [31:0] route_err_count; + logic [31:0] data_err_count; + + a_signal_data_err = 0; + b_signal_data_err = 0; + a_seqerr_prob = 0; + a_rterr_prob = 0; + a_lossy_input = 0; + b_seqerr_prob = 0; + b_rterr_prob = 0; + b_lossy_input = 0; + + // Initialize + // ---------------------------------------- + test.start_tb("chdr_stream_endpoint_tb"); + + // Start the BFMs + a0_data_bfm.run(); + b0_data_bfm.run(); + a1_data_bfm.run(); + b1_data_bfm.run(); + tb_chdr_bfm.run(); + + tb_chdr_bfm.set_master_stall_prob(0); + tb_chdr_bfm.set_slave_stall_prob(0); + + // Reset + // ---------------------------------------- + rfnoc_ctrl_clk_gen.reset(); + rfnoc_chdr_clk_gen.reset(); + + test.start_test("Wait for reset"); + test.start_timeout(timeout, 1us, "Waiting for reset"); + while (rfnoc_ctrl_rst) @(posedge rfnoc_ctrl_clk); + while (rfnoc_chdr_rst) @(posedge rfnoc_chdr_clk); + test.end_timeout(timeout); + `ASSERT_ERROR(!rfnoc_chdr_rst && !rfnoc_ctrl_rst, "Reset did not deassert"); + test.end_test(); + + // Discover Topology + // ---------------------------------------- + test.start_test("Discover Topology"); + begin + automatic chdr_header_t tx_mgmt_hdr, rx_mgmt_hdr; + automatic chdr_mgmt_t tx_mgmt_pl, rx_mgmt_pl; + automatic chdr_mgmt_op_t exp_mgmt_op; + + // *Status* We know nothing about the network. Need to discover stuff. + + // Generic management header + tx_mgmt_pl.header = '{ + default:'0, prot_ver:PROTOVER, chdr_width:translate_chdr_w(CHDR_W), src_epid:EPID_TB + }; + // Send a node info request to the crossbar + tx_mgmt_pl.header.num_hops = 2; + tx_mgmt_pl.ops.delete(); + tx_mgmt_pl.ops[0] = '{ // Hop 1: Send node info + op_payload:48'h0, op_code:MGMT_OP_INFO_REQ, ops_pending:8'd1}; + tx_mgmt_pl.ops[1] = '{ // Hop 1: Return + op_payload:48'h0, op_code:MGMT_OP_RETURN, ops_pending:8'd0}; + tx_mgmt_pl.ops[2] = '{ // Hop 2: Nop for return + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_hdr = '{ + pkt_type:CHDR_MANAGEMENT, seq_num:cached_mgmt_seqnum++, dst_epid:16'h0, default:'0}; + + // Send the packet and check the response + send_recv_mgmt_packet(tx_mgmt_hdr, tx_mgmt_pl, rx_mgmt_hdr, rx_mgmt_pl); + `ASSERT_ERROR(rx_mgmt_pl.header.num_hops == 1, + "Discover XB: Mgmt header was incorrect"); + exp_mgmt_op = '{op_payload:{2'h0, 8'd1/*ports_mgmt*/, 8'd3 /*ports*/, 10'd0 /*inst*/, 4'd1 /*type*/, DEV_ID}, + op_code:MGMT_OP_INFO_RESP, ops_pending:8'd0}; + `ASSERT_ERROR(rx_mgmt_pl.ops[1] == exp_mgmt_op, + "Discover XB: Mgmt response ops were incorrect"); + + // *Status* We just discovered a crossbar with 3 ports! + + // Configure the crossbar routing table with our (TB) address + // then send node info request on the other two ports + tx_mgmt_pl.header.num_hops = 3; + tx_mgmt_pl.ops.delete(); + tx_mgmt_pl.ops[0] = '{ // Hop 1: Crossbar: Config router to return packet to dest + op_payload:{22'h0, PORT_TB, EPID_TB}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd1}; + tx_mgmt_pl.ops[1] = '{ // Hop 1: Crossbar: Config router + op_payload:{38'h0, PORT_A}, op_code:MGMT_OP_SEL_DEST, ops_pending:8'd0}; + tx_mgmt_pl.ops[2] = '{ // Hop 2: Stream Endpoint: Send node info + op_payload:48'h0, op_code:MGMT_OP_INFO_REQ, ops_pending:8'd1}; + tx_mgmt_pl.ops[3] = '{ // Hop 2: Return + op_payload:48'h0, op_code:MGMT_OP_RETURN, ops_pending:8'd0}; + tx_mgmt_pl.ops[4] = '{ // Hop 3: TB: Nop for return + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_hdr = '{ + pkt_type:CHDR_MANAGEMENT, seq_num:cached_mgmt_seqnum++, dst_epid:16'h0, default:'0}; + + // Send the packet and check the response + send_recv_mgmt_packet(tx_mgmt_hdr, tx_mgmt_pl, rx_mgmt_hdr, rx_mgmt_pl); + `ASSERT_ERROR(rx_mgmt_pl.header.num_hops == 1, + "Discover SEP A: Mgmt header was incorrect"); + exp_mgmt_op = '{op_payload:{{4'd1, 6'd2, 6'd2, 2'b11} /*ext_info*/, 10'd0 /*inst*/, 4'd2 /*type*/, DEV_ID}, + op_code:MGMT_OP_INFO_RESP, ops_pending:8'd0}; + `ASSERT_ERROR(rx_mgmt_pl.ops[1] == exp_mgmt_op, + "Discover SEP A: Mgmt response ops were incorrect"); + + // *Status* We just discovered a stream endpoint on crossbar port 1 + + // Send node info request on the last port + tx_mgmt_pl.header.num_hops = 3; + tx_mgmt_pl.ops.delete(); + tx_mgmt_pl.ops[0] = '{ // Hop 1: Crossbar: Config router + op_payload:{38'h0, PORT_B}, op_code:MGMT_OP_SEL_DEST, ops_pending:8'd0}; + tx_mgmt_pl.ops[1] = '{ // Hop 2: Stream Endpoint: Send node info + op_payload:48'h0, op_code:MGMT_OP_INFO_REQ, ops_pending:8'd1}; + tx_mgmt_pl.ops[2] = '{ // Hop 2: Return + op_payload:48'h0, op_code:MGMT_OP_RETURN, ops_pending:8'd0}; + tx_mgmt_pl.ops[3] = '{ // Hop 3: TB: Nop for return + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_hdr = '{ + pkt_type:CHDR_MANAGEMENT, seq_num:cached_mgmt_seqnum++, dst_epid:16'h0, default:'0}; + + // Send the packet and check the response + send_recv_mgmt_packet(tx_mgmt_hdr, tx_mgmt_pl, rx_mgmt_hdr, rx_mgmt_pl); + `ASSERT_ERROR(rx_mgmt_pl.header.num_hops == 1, + "Discover SEP B: Mgmt header was incorrect"); + exp_mgmt_op = '{op_payload:{{4'd1, 6'd2, 6'd2, 2'b11} /*ext_info*/, 10'd1 /*inst*/, 4'd2 /*type*/, DEV_ID}, + op_code:MGMT_OP_INFO_RESP, ops_pending:8'd0}; + `ASSERT_ERROR(rx_mgmt_pl.ops[1] == exp_mgmt_op, + "Discover SEP B: Mgmt response ops were incorrect"); + + // *Status* We just discovered a stream endpoint on crossbar port 2 + end + test.end_test(); + + // Configure Routes to Stream Endpoints A and B + // ---------------------------------------- + test.start_test("Configure Routes"); + begin + automatic chdr_header_t tx_mgmt_hdr, rx_mgmt_hdr; + automatic chdr_mgmt_t tx_mgmt_pl, rx_mgmt_pl; + automatic chdr_mgmt_op_t exp_mgmt_op; + + // Generic management header + tx_mgmt_pl.header = '{ + default:'0, prot_ver:PROTOVER, chdr_width:translate_chdr_w(CHDR_W), src_epid:EPID_TB + }; + // Send a node info request to the crossbar + tx_mgmt_pl.header.num_hops = 2; + tx_mgmt_pl.ops.delete(); + + tx_mgmt_pl.ops[0] = '{ // Hop 1: Crossbar: Config path to EP A + op_payload:{22'h0, PORT_A, EPID_A}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd2}; + tx_mgmt_pl.ops[1] = '{ // Hop 1: Crossbar: Config path to EP B + op_payload:{22'h0, PORT_B, EPID_B}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd1}; + tx_mgmt_pl.ops[2] = '{ // Hop 1: Request node info to make the packet come back + op_payload:48'h0, op_code:MGMT_OP_RETURN, ops_pending:8'd0}; + tx_mgmt_pl.ops[3] = '{ // Hop 2: Nop for return + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_hdr = '{ + pkt_type:CHDR_MANAGEMENT, seq_num:cached_mgmt_seqnum++, dst_epid:16'h0, default:'0}; + + // Send the packet and check the response + send_recv_mgmt_packet(tx_mgmt_hdr, tx_mgmt_pl, rx_mgmt_hdr, rx_mgmt_pl); + `ASSERT_ERROR(rx_mgmt_pl.header.num_hops == 1, + "Config Routes: Mgmt header was incorrect"); + exp_mgmt_op = '{op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + `ASSERT_ERROR(rx_mgmt_pl.ops[0] == exp_mgmt_op, + "Config Routes: Mgmt response ops were incorrect"); + end + test.end_test(); + + // Configure Stream Endpoints + // ---------------------------------------- + test.start_test("Configure Stream Endpoints"); + begin + automatic chdr_header_t tx_mgmt_hdr, rx_mgmt_hdr; + automatic chdr_mgmt_t tx_mgmt_pl, rx_mgmt_pl; + automatic chdr_mgmt_op_t exp_mgmt_op; + + logic [15:0] epids[2] = {EPID_A, EPID_B}; + foreach (epids[i]) begin + // Generic management header + tx_mgmt_pl.header = '{ + default:'0, prot_ver:PROTOVER, chdr_width:translate_chdr_w(CHDR_W), src_epid:EPID_TB + }; + // Send a node info request to the crossbar + tx_mgmt_pl.header.num_hops = 3; + tx_mgmt_pl.ops.delete(); + + tx_mgmt_pl.ops[0] = '{ // Hop 1: Crossbar: Nop + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_pl.ops[1] = '{ // Hop 2: Reset + op_payload:{32'b111, sep_a.REG_RESET_AND_FLUSH}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd4}; + tx_mgmt_pl.ops[2] = '{ // Hop 2: Write EPID + op_payload:{16'h0, epids[i], sep_a.REG_EPID_SELF}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd3}; + tx_mgmt_pl.ops[3] = '{ // Hop 2: Read EPID + op_payload:{32'h0, sep_a.REG_EPID_SELF}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd2}; + tx_mgmt_pl.ops[4] = '{ // Hop 2: Read EPID + op_payload:{32'h0, sep_a.REG_OSTRM_CTRL_STATUS}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd1}; + tx_mgmt_pl.ops[5] = '{ // Hop 2: Stream Endpoint: Return + op_payload:48'h0, op_code:MGMT_OP_RETURN, ops_pending:8'd0}; + tx_mgmt_pl.ops[6] = '{ // Hop 3: Nop for return + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_hdr = '{ + pkt_type:CHDR_MANAGEMENT, seq_num:cached_mgmt_seqnum++, dst_epid:epids[i], default:'0}; + + // Send the packet and check the response + send_recv_mgmt_packet(tx_mgmt_hdr, tx_mgmt_pl, rx_mgmt_hdr, rx_mgmt_pl); + `ASSERT_ERROR(rx_mgmt_pl.header.num_hops == 1, + "Config SEP: Mgmt header was incorrect"); + exp_mgmt_op = '{op_payload:{16'h0, epids[i], sep_a.REG_EPID_SELF}, + op_code:MGMT_OP_CFG_RD_RESP, ops_pending:8'd1}; + `ASSERT_ERROR(rx_mgmt_pl.ops[1] == exp_mgmt_op, + "Config SEP: Mgmt response ops were incorrect"); + exp_mgmt_op = '{op_payload:{32'h0, sep_a.REG_OSTRM_CTRL_STATUS}, + op_code:MGMT_OP_CFG_RD_RESP, ops_pending:8'd0}; + `ASSERT_ERROR(rx_mgmt_pl.ops[2] == exp_mgmt_op, + "Config SEP: Mgmt response ops were incorrect"); + end + end + test.end_test(); + + // Setup a stream between Endpoint A and B + // ---------------------------------------- + test.start_test("Setup bidirectional stream between endpoints A and B"); + begin + automatic chdr_header_t tx_mgmt_hdr, rx_mgmt_hdr; + automatic chdr_mgmt_t tx_mgmt_pl, rx_mgmt_pl; + automatic chdr_mgmt_op_t exp_mgmt_op; + + logic [15:0] epids[2] = {EPID_A, EPID_B}; + foreach (epids[i]) begin + // Generic management header + tx_mgmt_pl.header = '{ + default:'0, prot_ver:PROTOVER, chdr_width:translate_chdr_w(CHDR_W), src_epid:EPID_TB + }; + // Configure FC on streams + tx_mgmt_pl.header.num_hops = 3; + tx_mgmt_pl.ops.delete(); + + tx_mgmt_pl.ops[0] = '{ // Hop 1: Crossbar: Nop + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_pl.ops[1] = '{ // Hop 2: Write destination EPID + op_payload:{16'h0, epids[1-i], sep_a.REG_OSTRM_DST_EPID}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd7}; + tx_mgmt_pl.ops[2] = '{ // Hop 2: Configure flow ack control freq + op_payload:{32'd50, sep_a.REG_OSTRM_FC_FREQ_BYTES_LO}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd6}; + tx_mgmt_pl.ops[3] = '{ // Hop 2: Configure flow ack control freq + op_payload:{32'd0, sep_a.REG_OSTRM_FC_FREQ_BYTES_HI}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd5}; + tx_mgmt_pl.ops[4] = '{ // Hop 2: Configure flow ack control freq + op_payload:{32'd1000, sep_a.REG_OSTRM_FC_FREQ_PKTS}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd4}; + tx_mgmt_pl.ops[5] = '{ // Hop 2: Configure flow headroom + op_payload:{32'd0, sep_a.REG_OSTRM_FC_HEADROOM}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd3}; + tx_mgmt_pl.ops[6] = '{ // Hop 2: Configure word swapping + op_payload:{32'h44, sep_a.REG_ISTRM_CTRL_STATUS}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd2}; // Swap 32-bit words, endianness + tx_mgmt_pl.ops[7] = '{ // Hop 2: Configure lossy and start config + op_payload:{32'h47, sep_a.REG_OSTRM_CTRL_STATUS}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd1}; // Swap 32-bit words, endianness, lossy and reset + tx_mgmt_pl.ops[8] = '{ // Hop 2: Stream Endpoint: Return + op_payload:48'h0, op_code:MGMT_OP_RETURN, ops_pending:8'd0}; + tx_mgmt_pl.ops[9] = '{ // Hop 3: Nop for return + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_hdr = '{ + pkt_type:CHDR_MANAGEMENT, seq_num:cached_mgmt_seqnum++, dst_epid:epids[i], default:'0}; + + // Send the packet and check the response + send_recv_mgmt_packet(tx_mgmt_hdr, tx_mgmt_pl, rx_mgmt_hdr, rx_mgmt_pl); + + // Wait for some time for node to flush and reset + // Typically we would poll in SW but we just wait to keep the code simple + repeat (256) @(posedge rfnoc_chdr_clk); + + // Read back FC status + tx_mgmt_pl.header.num_hops = 3; + tx_mgmt_pl.ops.delete(); + + tx_mgmt_pl.ops[0] = '{ // Hop 1: Crossbar: Nop + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_pl.ops[1] = '{ // Hop 2: Read status + op_payload:{32'h0, sep_a.REG_OSTRM_CTRL_STATUS}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd7}; + tx_mgmt_pl.ops[2] = '{ // Hop 2: Read status + op_payload:{32'h0, sep_a.REG_OSTRM_BUFF_CAP_BYTES_LO}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd6}; + tx_mgmt_pl.ops[3] = '{ // Hop 2: Read status + op_payload:{32'h0, sep_a.REG_OSTRM_BUFF_CAP_BYTES_HI}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd5}; + tx_mgmt_pl.ops[4] = '{ // Hop 2: Read status + op_payload:{32'h0, sep_a.REG_OSTRM_BUFF_CAP_PKTS}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd4}; + tx_mgmt_pl.ops[5] = '{ // Hop 2: Read status + op_payload:{32'h0, sep_a.REG_OSTRM_SEQ_ERR_CNT}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd3}; + tx_mgmt_pl.ops[6] = '{ // Hop 2: Read status + op_payload:{32'h0, sep_a.REG_OSTRM_DATA_ERR_CNT}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd2}; + tx_mgmt_pl.ops[7] = '{ // Hop 2: Read status + op_payload:{32'h0, sep_a.REG_OSTRM_ROUTE_ERR_CNT}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd1}; + tx_mgmt_pl.ops[8] = '{ // Hop 2: Stream Endpoint: Return + op_payload:48'h0, op_code:MGMT_OP_RETURN, ops_pending:8'd0}; + tx_mgmt_pl.ops[9] = '{ // Hop 3: Nop for return + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_hdr = '{ + pkt_type:CHDR_MANAGEMENT, seq_num:cached_mgmt_seqnum++, dst_epid:epids[i], default:'0}; + + // Send the packet and check the response + send_recv_mgmt_packet(tx_mgmt_hdr, tx_mgmt_pl, rx_mgmt_hdr, rx_mgmt_pl); + `ASSERT_ERROR(rx_mgmt_pl.header.num_hops == 1, + "Config SEP: Mgmt header was incorrect"); + exp_mgmt_op = '{op_payload:{32'h80000006, sep_a.REG_OSTRM_CTRL_STATUS}, // FC on, no errors and lossy + op_code:MGMT_OP_CFG_RD_RESP, ops_pending:8'd6}; + `ASSERT_ERROR(rx_mgmt_pl.ops[1] == exp_mgmt_op, "Config SEP: Mgmt response was incorrect"); + exp_mgmt_op = '{op_payload:{((1<<(MTU+1))*(CHDR_W/8)-1), sep_a.REG_OSTRM_BUFF_CAP_BYTES_LO}, + op_code:MGMT_OP_CFG_RD_RESP, ops_pending:8'd5}; + `ASSERT_ERROR(rx_mgmt_pl.ops[2] == exp_mgmt_op, "Config SEP: Mgmt response was incorrect"); + exp_mgmt_op = '{op_payload:{32'h0, sep_a.REG_OSTRM_BUFF_CAP_BYTES_HI}, + op_code:MGMT_OP_CFG_RD_RESP, ops_pending:8'd4}; + `ASSERT_ERROR(rx_mgmt_pl.ops[3] == exp_mgmt_op, "Config SEP: Mgmt response was incorrect"); + exp_mgmt_op = '{op_payload:{32'h00ffffff, sep_a.REG_OSTRM_BUFF_CAP_PKTS}, + op_code:MGMT_OP_CFG_RD_RESP, ops_pending:8'd3}; + `ASSERT_ERROR(rx_mgmt_pl.ops[4] == exp_mgmt_op, "Config SEP: Mgmt response was incorrect"); + exp_mgmt_op = '{op_payload:{32'h0, sep_a.REG_OSTRM_SEQ_ERR_CNT}, + op_code:MGMT_OP_CFG_RD_RESP, ops_pending:8'd2}; + `ASSERT_ERROR(rx_mgmt_pl.ops[5] == exp_mgmt_op, "Config SEP: Mgmt response was incorrect"); + exp_mgmt_op = '{op_payload:{32'h0, sep_a.REG_OSTRM_DATA_ERR_CNT}, + op_code:MGMT_OP_CFG_RD_RESP, ops_pending:8'd1}; + `ASSERT_ERROR(rx_mgmt_pl.ops[6] == exp_mgmt_op, "Config SEP: Mgmt response was incorrect"); + exp_mgmt_op = '{op_payload:{32'h0, sep_a.REG_OSTRM_ROUTE_ERR_CNT}, + op_code:MGMT_OP_CFG_RD_RESP, ops_pending:8'd0}; + `ASSERT_ERROR(rx_mgmt_pl.ops[7] == exp_mgmt_op, "Config SEP: Mgmt response was incorrect"); + end + end + test.end_test(); + + // Control transactions to Endpoint A + // ---------------------------------------- + cached_ctrl_seqnum = 0; + for (int cfg = 0; cfg < 2; cfg++) begin + $sformat(tc_label, "Control Xact to A (%s)", (cfg?"Slow":"Fast")); + test.start_test(tc_label); + begin + tb_chdr_bfm.set_master_stall_prob(cfg?SLOW_STALL_PROB:FAST_STALL_PROB); + tb_chdr_bfm.set_slave_stall_prob(cfg?SLOW_STALL_PROB:FAST_STALL_PROB); + send_recv_ctrl_packets(EPID_A, NUM_PKTS_PER_TEST, cached_ctrl_seqnum); + end + test.end_test(); + cached_ctrl_seqnum += NUM_PKTS_PER_TEST; + end + + // Control transactions to Endpoint B + // ---------------------------------------- + cached_ctrl_seqnum = 0; + for (int cfg = 0; cfg < 2; cfg++) begin + $sformat(tc_label, "Control Xact to B (%s)", (cfg?"Slow":"Fast")); + test.start_test(tc_label); + begin + tb_chdr_bfm.set_master_stall_prob(cfg?SLOW_STALL_PROB:FAST_STALL_PROB); + tb_chdr_bfm.set_slave_stall_prob(cfg?SLOW_STALL_PROB:FAST_STALL_PROB); + send_recv_ctrl_packets(EPID_B, NUM_PKTS_PER_TEST, cached_ctrl_seqnum); + end + test.end_test(); + cached_ctrl_seqnum += NUM_PKTS_PER_TEST; + end + + // Stream data from A to B + // ---------------------------------------- + cached_data_seqnum = 0; + for (int cfg = 0; cfg < 4; cfg++) begin + automatic logic mst_cfg = cfg[0]; + automatic logic slv_cfg = cfg[1]; + $sformat(tc_label, "Stream Data from A to B (%s Mst, %s Slv)", + (mst_cfg?"Slow":"Fast"), (slv_cfg?"Slow":"Fast")); + test.start_test(tc_label); + begin + set_unidir_stall_prob(EPID_A, EPID_B, + mst_cfg?SLOW_STALL_PROB:FAST_STALL_PROB, + slv_cfg?SLOW_STALL_PROB:FAST_STALL_PROB); + send_recv_data_packets(EPID_A, EPID_B, NUM_PKTS_PER_TEST, cached_data_seqnum); + end + test.end_test(); + cached_data_seqnum += NUM_PKTS_PER_TEST; + end + + // Stream data from B to A + // ---------------------------------------- + cached_data_seqnum = 0; + for (int cfg = 0; cfg < 4; cfg++) begin + automatic logic mst_cfg = cfg[0]; + automatic logic slv_cfg = cfg[1]; + $sformat(tc_label, "Stream Data from B to A (%s Mst, %s Slv)", + (mst_cfg?"Slow":"Fast"), (slv_cfg?"Slow":"Fast")); + test.start_test(tc_label); + begin + set_unidir_stall_prob(EPID_B, EPID_A, + mst_cfg?SLOW_STALL_PROB:FAST_STALL_PROB, + slv_cfg?SLOW_STALL_PROB:FAST_STALL_PROB); + send_recv_data_packets(EPID_B, EPID_A, NUM_PKTS_PER_TEST, cached_data_seqnum); + end + test.end_test(); + cached_data_seqnum += NUM_PKTS_PER_TEST; + end + + // Stream data between A <=> B simultaneously + // ---------------------------------------- + for (int cfg = 0; cfg < 4; cfg++) begin + automatic logic mst_cfg = cfg[0]; + automatic logic slv_cfg = cfg[1]; + $sformat(tc_label, "Stream Data between A <=> B simultaneously (%s Mst, %s Slv)", + (mst_cfg?"Slow":"Fast"), (slv_cfg?"Slow":"Fast")); + test.start_test(tc_label); + begin + set_bidir_stall_prob( + mst_cfg?SLOW_STALL_PROB:FAST_STALL_PROB, + slv_cfg?SLOW_STALL_PROB:FAST_STALL_PROB); + fork + send_recv_data_packets(EPID_B, EPID_A, NUM_PKTS_PER_TEST, cached_data_seqnum); + send_recv_data_packets(EPID_A, EPID_B, NUM_PKTS_PER_TEST, cached_data_seqnum); + join + end + test.end_test(); + cached_data_seqnum += NUM_PKTS_PER_TEST; + end + + // Stream data and control between A <=> B simultaneously + // ---------------------------------------- + for (int cfg = 0; cfg < 4; cfg++) begin + automatic logic mst_cfg = cfg[0]; + automatic logic slv_cfg = cfg[1]; + $sformat(tc_label, "Stream Data and Control between A <=> B (%s Mst, %s Slv)", + (mst_cfg?"Slow":"Fast"), (slv_cfg?"Slow":"Fast")); + test.start_test(tc_label); + begin + tb_chdr_bfm.set_master_stall_prob(mst_cfg?SLOW_STALL_PROB:FAST_STALL_PROB); + tb_chdr_bfm.set_slave_stall_prob(slv_cfg?SLOW_STALL_PROB:FAST_STALL_PROB); + set_bidir_stall_prob( + mst_cfg?SLOW_STALL_PROB:FAST_STALL_PROB, + slv_cfg?SLOW_STALL_PROB:FAST_STALL_PROB); + fork + send_recv_data_packets(EPID_B, EPID_A, NUM_PKTS_PER_TEST/2, cached_data_seqnum); + send_recv_data_packets(EPID_A, EPID_B, NUM_PKTS_PER_TEST/2, cached_data_seqnum); + send_recv_ctrl_packets(EPID_A, NUM_PKTS_PER_TEST, cached_ctrl_seqnum); + join + cached_data_seqnum += NUM_PKTS_PER_TEST/2; + fork + send_recv_data_packets(EPID_B, EPID_A, NUM_PKTS_PER_TEST/2, cached_data_seqnum); + send_recv_data_packets(EPID_A, EPID_B, NUM_PKTS_PER_TEST/2, cached_data_seqnum); + send_recv_ctrl_packets(EPID_B, NUM_PKTS_PER_TEST, cached_ctrl_seqnum); + join + cached_data_seqnum += NUM_PKTS_PER_TEST/2; + cached_ctrl_seqnum += NUM_PKTS_PER_TEST; + end + test.end_test(); + end + + // Check zero sequence errors after streaming + // ---------------------------------------- + test.start_test("Check zero sequence errors after streaming"); + begin + logic [15:0] epids[2] = {EPID_A, EPID_B}; + foreach (epids[i]) begin + mgmt_read_err_counts(epids[i], seq_err_count, route_err_count, data_err_count); + `ASSERT_ERROR(seq_err_count == 32'd0, "Check NoErrs: Incorrect seq error count"); + `ASSERT_ERROR(route_err_count == 32'd0, "Check NoErrs: Incorrect route error count"); + `ASSERT_ERROR(data_err_count == 32'd0, "Check NoErrs: Incorrect data error count"); + end + end + test.end_test(); + + // Force sequence error + // ---------------------------------------- + test.start_test("Force sequence error"); + begin + // First sequence error + send_recv_data_packets(EPID_A, EPID_B, 1, cached_data_seqnum++, 1); + b_seqerr_prob = 100; // Simulate a dropped packet + send_recv_data_packets(EPID_A, EPID_B, 1, cached_data_seqnum++, 1); + b_seqerr_prob = 0; + repeat (100) @(posedge rfnoc_chdr_clk); // Wait for sequence error to reach the upstream port + mgmt_read_err_counts(EPID_A, seq_err_count, route_err_count, data_err_count); + `ASSERT_ERROR(seq_err_count == 32'd1, "Force SeqErr: Incorrect seq error count"); + `ASSERT_ERROR(route_err_count == 32'd0, "Force SeqErr: Incorrect route error count"); + `ASSERT_ERROR(data_err_count == 32'd0, "Force SeqErr: Incorrect data error count"); + + // Second and third sequence error + send_recv_data_packets(EPID_A, EPID_B, 1, cached_data_seqnum++, 1); + b_seqerr_prob = 100; // Simulate another dropped packet + send_recv_data_packets(EPID_A, EPID_B, 1, cached_data_seqnum++, 1); + b_seqerr_prob = 0; + repeat (100) @(posedge rfnoc_chdr_clk); // Wait for sequence error to reach the upstream port + mgmt_read_err_counts(EPID_A, seq_err_count, route_err_count, data_err_count); + `ASSERT_ERROR(seq_err_count > 32'd1, "Force SeqErr: Incorrect seq error count"); + `ASSERT_ERROR(route_err_count == 32'd0, "Force SeqErr: Incorrect route error count"); + `ASSERT_ERROR(data_err_count == 32'd0, "Force SeqErr: Incorrect data error count"); + end + test.end_test(); + + // Force routing error + // ---------------------------------------- + test.start_test("Force routing error"); + begin + logic [31:0] old_route_err_count; + // First sequence error + send_recv_data_packets(EPID_B, EPID_A, 1, cached_data_seqnum++, 1); + a_rterr_prob = 100; // Simulate a routing error + send_recv_data_packets(EPID_B, EPID_A, 1, cached_data_seqnum++, 1); + a_rterr_prob = 0; + repeat (100) @(posedge rfnoc_chdr_clk); // Wait for sequence error to reach the upstream port + mgmt_read_err_counts(EPID_B, seq_err_count, route_err_count, data_err_count); + `ASSERT_ERROR(seq_err_count == 32'd0, "Force RouteErr 1: Incorrect seq error count"); + `ASSERT_ERROR(route_err_count > 32'd0, "Force RouteErr 1: Incorrect route error count"); + `ASSERT_ERROR(data_err_count == 32'd0, "Force RouteErr 1: Incorrect data error count"); + old_route_err_count = route_err_count; + + // Second routing error + send_recv_data_packets(EPID_B, EPID_A, 1, cached_data_seqnum++, 1); + a_rterr_prob = 100; // Simulate a routing error + send_recv_data_packets(EPID_B, EPID_A, 1, cached_data_seqnum++, 1); + a_rterr_prob = 0; + repeat (100) @(posedge rfnoc_chdr_clk); // Wait for sequence error to reach the upstream port + mgmt_read_err_counts(EPID_B, seq_err_count, route_err_count, data_err_count); + `ASSERT_ERROR(seq_err_count == 32'd0, "Force RouteErr 2: Incorrect seq error count"); + `ASSERT_ERROR(route_err_count > old_route_err_count, "Force RouteErr 2: Incorrect route error count"); + `ASSERT_ERROR(data_err_count == 32'd0, "Force RouteErr 2: Incorrect data error count"); + end + test.end_test(); + + // Setup a stream between Endpoint A and B + // ---------------------------------------- + test.start_test("Reconfigure flow control (reset state)"); + begin + automatic chdr_header_t tx_mgmt_hdr, rx_mgmt_hdr; + automatic chdr_mgmt_t tx_mgmt_pl, rx_mgmt_pl; + automatic chdr_mgmt_op_t exp_mgmt_op; + + logic [15:0] epids[2] = {EPID_A, EPID_B}; + foreach (epids[i]) begin + // Generic management header + tx_mgmt_pl.header = '{ + default:'0, prot_ver:PROTOVER, chdr_width:translate_chdr_w(CHDR_W), src_epid:EPID_TB + }; + // Configure FC on streams + tx_mgmt_pl.header.num_hops = 3; + tx_mgmt_pl.ops.delete(); + + tx_mgmt_pl.ops[0] = '{ // Hop 1: Crossbar: Nop + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_pl.ops[1] = '{ // Hop 2: Disable swapping + op_payload:{32'd0, sep_a.REG_ISTRM_CTRL_STATUS}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd2}; + tx_mgmt_pl.ops[2] = '{ // Hop 2: Configure lossy and start config + op_payload:{32'd3, sep_a.REG_OSTRM_CTRL_STATUS}, op_code:MGMT_OP_CFG_WR_REQ, ops_pending:8'd1}; + tx_mgmt_pl.ops[3] = '{ // Hop 2: Stream Endpoint: Return + op_payload:48'h0, op_code:MGMT_OP_RETURN, ops_pending:8'd0}; + tx_mgmt_pl.ops[4] = '{ // Hop 3: Nop for return + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_hdr = '{ + pkt_type:CHDR_MANAGEMENT, seq_num:cached_mgmt_seqnum++, dst_epid:epids[i], default:'0}; + + // Send the packet and check the response + send_recv_mgmt_packet(tx_mgmt_hdr, tx_mgmt_pl, rx_mgmt_hdr, rx_mgmt_pl); + + // Wait for some time for node to flush and reset + // Typically we would poll in SW but we just wait to keep the code simple + repeat (256) @(posedge rfnoc_chdr_clk); + + // Read back FC status + tx_mgmt_pl.header.num_hops = 3; + tx_mgmt_pl.ops.delete(); + + tx_mgmt_pl.ops[0] = '{ // Hop 1: Crossbar: Nop + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_pl.ops[1] = '{ // Hop 2: Read status + op_payload:{32'h0, sep_a.REG_OSTRM_CTRL_STATUS}, op_code:MGMT_OP_CFG_RD_REQ, ops_pending:8'd1}; + tx_mgmt_pl.ops[2] = '{ // Hop 2: Stream Endpoint: Return + op_payload:48'h0, op_code:MGMT_OP_RETURN, ops_pending:8'd0}; + tx_mgmt_pl.ops[3] = '{ // Hop 3: Nop for return + op_payload:48'h0, op_code:MGMT_OP_NOP, ops_pending:8'd0}; + tx_mgmt_hdr = '{ + pkt_type:CHDR_MANAGEMENT, seq_num:cached_mgmt_seqnum++, dst_epid:epids[i], default:'0}; + + // Send the packet and check the response + send_recv_mgmt_packet(tx_mgmt_hdr, tx_mgmt_pl, rx_mgmt_hdr, rx_mgmt_pl); + `ASSERT_ERROR(rx_mgmt_pl.header.num_hops == 1, + "Config SEP: Mgmt header was incorrect"); + exp_mgmt_op = '{op_payload:{32'h80000002, sep_a.REG_OSTRM_CTRL_STATUS}, // FC on, no errors and lossy + op_code:MGMT_OP_CFG_RD_RESP, ops_pending:8'd0}; + `ASSERT_ERROR(rx_mgmt_pl.ops[1] == exp_mgmt_op, "Config SEP: Mgmt response was incorrect"); + end + end + test.end_test(); + + // Check zero errors after reinit + // ---------------------------------------- + test.start_test("Check zero errors after reinit"); + begin + logic [15:0] epids[2] = {EPID_A, EPID_B}; + foreach (epids[i]) begin + mgmt_read_err_counts(epids[i], seq_err_count, route_err_count, data_err_count); + `ASSERT_ERROR(seq_err_count == 32'd0, "Check NoErrs: Incorrect seq error count"); + `ASSERT_ERROR(route_err_count == 32'd0, "Check NoErrs: Incorrect route error count"); + `ASSERT_ERROR(data_err_count == 32'd0, "Check NoErrs: Incorrect data error count"); + end + end + test.end_test(); + + // Stream data between A <=> B simultaneously + // ---------------------------------------- + test.start_test("Stream Data between A <=> B with a lossy link"); + begin + cached_data_seqnum = 0; + set_bidir_stall_prob(FAST_STALL_PROB, SLOW_STALL_PROB); + a_lossy_input = 1; + b_lossy_input = 1; + fork + send_recv_data_packets(EPID_B, EPID_A, NUM_PKTS_PER_TEST * 10, cached_data_seqnum); + send_recv_data_packets(EPID_A, EPID_B, NUM_PKTS_PER_TEST * 10, cached_data_seqnum); + join + a_lossy_input = 0; + b_lossy_input = 0; + end + test.end_test(); + cached_data_seqnum += NUM_PKTS_PER_TEST*10; + + // Finish Up + // ---------------------------------------- + // Display final statistics and results + test.end_tb(); + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/sim/chdr_stream_endpoint_tb/lossy_xport_model.v b/fpga/usrp3/lib/rfnoc/sim/chdr_stream_endpoint_tb/lossy_xport_model.v new file mode 100644 index 000000000..a93b4b305 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/sim/chdr_stream_endpoint_tb/lossy_xport_model.v @@ -0,0 +1,66 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: lossy_xport_model +// + +module lossy_xport_model #( + parameter CHDR_W = 256 +)( + input wire clk, + input wire rst, + input wire [CHDR_W-1:0] s_axis_tdata, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + output wire [CHDR_W-1:0] m_axis_tdata, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready, + input wire [7:0] seqerr_prob, + input wire [7:0] rterr_prob, + input wire lossy +); + wire [CHDR_W-1:0] tmp_tdata; + wire tmp_tlast; + wire tmp_tvalid; + wire tmp_tready; + + reg pkt_header = 1'b1; + always @(posedge clk) begin + if (rst) begin + pkt_header <= 1'b1; + end else if (s_axis_tvalid && s_axis_tready) begin + pkt_header <= s_axis_tlast; + end + end + wire pkt_stb = (s_axis_tvalid && s_axis_tready && s_axis_tlast); + + reg force_seq_err, force_route_err; + always @(pkt_stb or seqerr_prob) begin + force_seq_err = ($urandom_range(99) < seqerr_prob); + end + always @(pkt_stb or rterr_prob) begin + force_route_err = ($urandom_range(99) < rterr_prob); + end + + wire [15:0] new_seq_num = s_axis_tdata[47:32] + 16'd1; //Increment SeqNum + wire [15:0] new_dst_epid = ~s_axis_tdata[15:0]; //Invert DstEPID + + assign tmp_tdata = !pkt_header ? s_axis_tdata : ( + force_seq_err ? {s_axis_tdata[CHDR_W-1:48], new_seq_num, s_axis_tdata[31:0]} : ( + force_route_err ? {s_axis_tdata[CHDR_W-1:16], new_dst_epid} : s_axis_tdata)); + assign tmp_tlast = s_axis_tlast; + assign tmp_tvalid = s_axis_tvalid; + assign s_axis_tready = lossy || tmp_tready; + + axi_fifo #(.WIDTH(CHDR_W+1), .SIZE(1)) out_fifo ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({tmp_tlast, tmp_tdata}), .i_tvalid(tmp_tvalid), .i_tready(tmp_tready), + .o_tdata({m_axis_tlast, m_axis_tdata}), .o_tvalid(m_axis_tvalid), .o_tready(m_axis_tready), + .space(), .occupied() + ); + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/sim/ctrlport_endpoint_tb/Makefile b/fpga/usrp3/lib/rfnoc/sim/ctrlport_endpoint_tb/Makefile new file mode 100644 index 000000000..f1f064547 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/sim/ctrlport_endpoint_tb/Makefile @@ -0,0 +1,39 @@ +# +# Copyright 2019 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(RFNOC_CORE_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +SIM_TOP = ctrlport_endpoint_tb + +SIM_SRCS = \ +$(abspath ctrlport_endpoint_tb.sv) \ + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/sim/ctrlport_endpoint_tb/ctrlport_endpoint_tb.sv b/fpga/usrp3/lib/rfnoc/sim/ctrlport_endpoint_tb/ctrlport_endpoint_tb.sv new file mode 100644 index 000000000..492e48829 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/sim/ctrlport_endpoint_tb/ctrlport_endpoint_tb.sv @@ -0,0 +1,502 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_endpoint_tb +// + +`default_nettype none + + +module ctrlport_endpoint_tb; + + // Include macros and time declarations for use with PkgTestExec + `include "test_exec.svh" + + import PkgTestExec::*; + import PkgChdrUtils::*; + import PkgAxisCtrlBfm::*; + + // Parameters + localparam [9:0] THIS_PORTID = 10'h17; + localparam [15:0] THIS_EPID = 16'hDEAD; + + localparam integer NUM_XACT_PER_TEST = 300; + localparam integer FAST_STALL_PROB = 0; + localparam integer SLOW_STALL_PROB = 50; + localparam bit VERBOSE = 0; + + // Clock and Reset Definition + bit rfnoc_ctrl_clk, rfnoc_ctrl_rst; + bit ctrlport_clk, ctrlport_rst; + + sim_clock_gen #(6.0) rfnoc_ctrl_clk_gen (rfnoc_ctrl_clk, rfnoc_ctrl_rst); // 166.6 MHz + sim_clock_gen #(20.0) ctrlport_clk_gen (ctrlport_clk, ctrlport_rst); // 50 MHz + + // ---------------------------------------- + // Instantiate DUT + // ---------------------------------------- + AxiStreamIf #(32) m_ctrl (rfnoc_ctrl_clk, rfnoc_ctrl_rst); + AxiStreamIf #(32) s_ctrl (rfnoc_ctrl_clk, rfnoc_ctrl_rst); + AxisCtrlBfm axis_ctrl_bfm; + + wire [31:0] axis_mst_tdata, axis_slv_tdata , axis_req_tdata , axis_resp_tdata ; + wire axis_mst_tlast, axis_slv_tlast , axis_req_tlast , axis_resp_tlast ; + wire axis_mst_tvalid, axis_slv_tvalid, axis_req_tvalid, axis_resp_tvalid; + wire axis_mst_tready, axis_slv_tready, axis_req_tready, axis_resp_tready; + + wire cp_slv_req_wr; + wire cp_slv_req_rd; + wire [19:0] cp_slv_req_addr; + wire [31:0] cp_slv_req_data; + wire [3:0] cp_slv_req_byte_en; + wire cp_slv_req_has_time; + wire [63:0] cp_slv_req_time; + reg cp_slv_resp_ack; + reg [1:0] cp_slv_resp_status; + reg [31:0] cp_slv_resp_data; + + logic cp_mst_req_wr; + logic cp_mst_req_rd; + logic [19:0] cp_mst_req_addr; + logic [9:0] cp_mst_req_portid; + logic [15:0] cp_mst_req_rem_epid; + logic [9:0] cp_mst_req_rem_portid; + logic [31:0] cp_mst_req_data; + logic [3:0] cp_mst_req_byte_en; + logic cp_mst_req_has_time; + logic [63:0] cp_mst_req_time; + wire cp_mst_resp_ack; + wire [1:0] cp_mst_resp_status; + wire [31:0] cp_mst_resp_data; + + ctrlport_endpoint #( + .THIS_PORTID (THIS_PORTID), + .SYNC_CLKS (0), + .AXIS_CTRL_MST_EN (1), + .AXIS_CTRL_SLV_EN (1), + .SLAVE_FIFO_SIZE (5) + ) dut ( + .rfnoc_ctrl_clk (rfnoc_ctrl_clk ), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst ), + .ctrlport_clk (ctrlport_clk ), + .ctrlport_rst (ctrlport_rst ), + .s_rfnoc_ctrl_tdata (axis_mst_tdata ), + .s_rfnoc_ctrl_tlast (axis_mst_tlast ), + .s_rfnoc_ctrl_tvalid (axis_mst_tvalid ), + .s_rfnoc_ctrl_tready (axis_mst_tready ), + .m_rfnoc_ctrl_tdata (axis_slv_tdata ), + .m_rfnoc_ctrl_tlast (axis_slv_tlast ), + .m_rfnoc_ctrl_tvalid (axis_slv_tvalid ), + .m_rfnoc_ctrl_tready (axis_slv_tready ), + .m_ctrlport_req_wr (cp_slv_req_wr ), + .m_ctrlport_req_rd (cp_slv_req_rd ), + .m_ctrlport_req_addr (cp_slv_req_addr ), + .m_ctrlport_req_data (cp_slv_req_data ), + .m_ctrlport_req_byte_en (cp_slv_req_byte_en ), + .m_ctrlport_req_has_time (cp_slv_req_has_time ), + .m_ctrlport_req_time (cp_slv_req_time ), + .m_ctrlport_resp_ack (cp_slv_resp_ack ), + .m_ctrlport_resp_status (cp_slv_resp_status ), + .m_ctrlport_resp_data (cp_slv_resp_data ), + .s_ctrlport_req_wr (cp_mst_req_wr ), + .s_ctrlport_req_rd (cp_mst_req_rd ), + .s_ctrlport_req_addr (cp_mst_req_addr ), + .s_ctrlport_req_portid (cp_mst_req_portid ), + .s_ctrlport_req_rem_epid (cp_mst_req_rem_epid ), + .s_ctrlport_req_rem_portid(cp_mst_req_rem_portid), + .s_ctrlport_req_data (cp_mst_req_data ), + .s_ctrlport_req_byte_en (cp_mst_req_byte_en ), + .s_ctrlport_req_has_time (cp_mst_req_has_time ), + .s_ctrlport_req_time (cp_mst_req_time ), + .s_ctrlport_resp_ack (cp_mst_resp_ack ), + .s_ctrlport_resp_status (cp_mst_resp_status ), + .s_ctrlport_resp_data (cp_mst_resp_data ) + ); + + // ---------------------------------------- + // Test Helpers + // ---------------------------------------- + + // Add a MUX and DEMUX on the ctrlport logic to loop responses + // back into the endpoint and to allow external access from the + // master and slave BFM. + axi_mux #( + .WIDTH(32), .SIZE(2), .PRIO(0), .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(0) + ) mux_i ( + .clk(rfnoc_ctrl_clk), .reset(rfnoc_ctrl_rst), .clear(1'b0), + .i_tdata ({m_ctrl.slave.tdata , axis_resp_tdata }), + .i_tlast ({m_ctrl.slave.tlast , axis_resp_tlast }), + .i_tvalid({m_ctrl.slave.tvalid, axis_resp_tvalid}), + .i_tready({m_ctrl.slave.tready, axis_resp_tready}), + .o_tdata (axis_mst_tdata ), + .o_tlast (axis_mst_tlast ), + .o_tvalid(axis_mst_tvalid), + .o_tready(axis_mst_tready) + ); + + wire [31:0] in_hdr; + axi_demux #( + .WIDTH(32), .SIZE(2), .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(0) + ) demux_i ( + .clk(rfnoc_ctrl_clk), .reset(rfnoc_ctrl_rst), .clear(1'b0), + .header(in_hdr), .dest(in_hdr[31]), + .i_tdata (axis_slv_tdata ), + .i_tlast (axis_slv_tlast ), + .i_tvalid(axis_slv_tvalid), + .i_tready(axis_slv_tready), + .o_tdata ({s_ctrl.master.tdata , axis_req_tdata }), + .o_tlast ({s_ctrl.master.tlast , axis_req_tlast }), + .o_tvalid({s_ctrl.master.tvalid, axis_req_tvalid}), + .o_tready({s_ctrl.master.tready, axis_req_tready}) + ); + + // -------------------------- + // [Dummy Control Port Slave] + // Slave Model: + // - Respond in 1 clock cycle + // - Status = Upper 2 bits of the address + // - Response Data = 0xFEED and Negated bottom 16 bits of addr + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + cp_slv_resp_ack <= 1'b0; + end else begin + cp_slv_resp_ack <= cp_slv_req_wr | cp_slv_req_rd; + cp_slv_resp_status <= cp_slv_req_addr[19:18]; + cp_slv_resp_data <= {16'hFEED, ~cp_slv_req_addr[15:0]}; + end + end + // -------------------------- + + // ---------------------------- + // [Dummy AXIS-Ctrl Port Slave] + // Slave Model: + // - Response = Request but with the ACK bit set + // - Status = Upper 2 bits of the address + // - Response Data = Request Data + reg [4:0] line_num = 5'd0; + reg pkt_has_time = 1'b0; + wire pkt_hdr_line = (line_num == 5'd0); + wire pkt_op_line = pkt_has_time ? (line_num == 5'd4) : (line_num == 5'd2); + always @(posedge rfnoc_ctrl_clk) begin + if (rfnoc_ctrl_rst) begin + line_num <= 5'd0; + pkt_has_time <= 1'b0; + end else if (axis_req_tvalid & axis_resp_tready) begin + if (pkt_hdr_line) + pkt_has_time <= axis_req_tdata[30]; + line_num <= axis_req_tlast ? 5'd0 : (line_num + 1); + end + end + assign axis_resp_tdata = + pkt_hdr_line ? {1'b1, axis_req_tdata[30:0]} : ( + pkt_op_line ? {axis_req_tdata[19:18], axis_req_tdata[29:0]} : + axis_req_tdata); + assign axis_resp_tlast = axis_req_tlast; + assign axis_resp_tvalid = axis_req_tvalid; + assign axis_req_tready = axis_resp_tready; + // ---------------------------- + + // Task to send a ctrlport request and receive a response + task ctrlport_transact( + input wr, + input rd, + input [19:0] addr, + input [9:0] portid, + input [15:0] rem_epid, + input [9:0] rem_portid, + input [31:0] data, + input [3:0] byte_en, + input has_time, + input [63:0] timestamp, + output [1:0] resp_status, + output [31:0] resp_data + ); + if (rd | wr) begin + cp_mst_req_wr <= wr; + cp_mst_req_rd <= rd; + cp_mst_req_addr <= addr; + cp_mst_req_portid <= portid; + cp_mst_req_rem_epid <= rem_epid; + cp_mst_req_rem_portid <= rem_portid; + cp_mst_req_data <= data; + cp_mst_req_byte_en <= byte_en; + cp_mst_req_has_time <= has_time; + cp_mst_req_time <= timestamp; + @(posedge ctrlport_clk); + cp_mst_req_wr <= 0; + cp_mst_req_rd <= 0; + while (~cp_mst_resp_ack) @(posedge ctrlport_clk); + resp_status = cp_mst_resp_status; + resp_data = cp_mst_resp_data; + + // Validate contents + if (VERBOSE) begin + $display("%s(addr=%0x, data=%0x, portid=%0x, has_time=%0b) = %0x (Status = %0d)", + (rd&wr)?"WRRD":(rd?"RD":"WR"), addr, data, portid, has_time, resp_data, resp_status); + end + `ASSERT_ERROR(cp_mst_resp_status == addr[19:18], + "Received Ctrlport response had the wrong status"); + `ASSERT_ERROR(cp_mst_resp_data == data, + "Received Ctrlport response had the wrong data"); + end + endtask + + // Task to send a AxisCtrl request and receive a response + logic [5:0] cached_seq_num = 0; + task axis_ctrl_transact( + input [3:0] opcode, + input [19:0] addr, + input [9:0] portid, + input [15:0] rem_epid, + input [9:0] rem_portid, + input [31:0] data[$], + input [3:0] byte_en, + input has_time, + input [63:0] timestamp, + output [1:0] resp_status, + output [31:0] resp_data + ); + automatic AxisCtrlPacket tx_pkt, rx_pkt = null, exp_pkt = null; + automatic axis_ctrl_header_t header; + automatic ctrl_op_word_t op_word; + automatic ctrl_status_t exp_status; + automatic ctrl_word_t exp_data0; + + // Opcode specific logic + case (ctrl_opcode_t'(opcode)) + CTRL_OP_SLEEP: begin + // data[0] = cycles of sleep so limit its value + if (data.size() > 0) data[0][31:5] = 0; + exp_status = CTRL_STS_OKAY; + exp_data0 = data[0]; + end + CTRL_OP_WRITE_READ: begin + exp_status = ctrl_status_t'(addr[19:18]); + exp_data0 = {16'hFEED, ~addr[15:0]}; + end + CTRL_OP_WRITE: begin + exp_status = ctrl_status_t'(addr[19:18]); + exp_data0 = data[0]; + end + CTRL_OP_READ: begin + exp_status = ctrl_status_t'(addr[19:18]); + exp_data0 = {16'hFEED, ~addr[15:0]}; + end + default: begin + exp_status = CTRL_STS_CMDERR; + exp_data0 = data[0]; + end + endcase + + // Build TX packet + tx_pkt = new(); + header = '{ + default : '0, + rem_dst_port : rem_portid, + rem_dst_epid : rem_epid, + is_ack : 1'b0, + has_time : has_time, + seq_num : cached_seq_num, + num_data : data.size(), + src_port : THIS_PORTID, + dst_port : portid + }; + op_word = '{ + default : '0, + op_code : ctrl_opcode_t'(opcode), + byte_enable : byte_en, + address : addr + }; + tx_pkt.write_ctrl(header, op_word, data, timestamp); + + // Build expected packet (NULL if data vector is empty) + if (data.size() > 0) begin + exp_pkt = tx_pkt.copy(); + exp_pkt.header.is_ack = 1'b1; + exp_pkt.op_word.status = exp_status; + exp_pkt.data[0] = exp_data0; + end + + if (VERBOSE) $display("*******************"); + fork + // Send the packet + begin + axis_ctrl_bfm.put_ctrl(tx_pkt.copy()); + if (VERBOSE) begin $display("[TRANSMITTED]"); tx_pkt.print(); end + end + // Wait for response only if we are expecting one + if (exp_pkt != null) begin + axis_ctrl_bfm.get_ctrl(rx_pkt); + if (VERBOSE) begin $display("[RECEIVED]"); rx_pkt.print(); end + end + join + cached_seq_num = cached_seq_num + 1; + + // Validate contents + if (exp_pkt != null) begin + if (VERBOSE) begin $display("[EXPECTED]"); exp_pkt.print(); end + `ASSERT_ERROR(exp_pkt.equal(rx_pkt), + "Received AXIS-Ctrl packet was incorrect"); + end + endtask + + // ---------------------------------------- + // Test Process + // ---------------------------------------- + initial begin + // Shared Variables + // ---------------------------------------- + timeout_t timeout; + string tc_label; + logic [31:0] data_vtr[$]; + logic [1:0] resp_status; + logic [31:0] resp_data; + + // Initialize + // ---------------------------------------- + test.start_tb("ctrlport_endpoint_tb"); + + // Start the BFMs + axis_ctrl_bfm = new(m_ctrl, s_ctrl); + axis_ctrl_bfm.run(); + + // Reset + // ---------------------------------------- + rfnoc_ctrl_clk_gen.reset(); + ctrlport_clk_gen.reset(); + + test.start_test("Wait for reset"); + test.start_timeout(timeout, 1us, "Waiting for reset"); + while (rfnoc_ctrl_rst) @(posedge rfnoc_ctrl_clk); + while (ctrlport_rst) @(posedge ctrlport_clk); + test.end_timeout(timeout); + `ASSERT_ERROR(!ctrlport_rst && !rfnoc_ctrl_rst, "Reset did not deassert"); + test.end_test(); + + // AXIS-Ctrl Slave Test + // ---------------------------------------- + // Send AXIS-Ctrl packets to the DUT and expect AXIS-Ctrl + // responses. There is a ctrlport slave implemented above + for (int cfg = 0; cfg < 4; cfg++) begin + automatic logic mst_cfg = cfg[0]; + automatic logic slv_cfg = cfg[1]; + $sformat(tc_label, + "AXIS-Ctrl Slave (%s Master, %s Slave)", + (mst_cfg?"Slow":"Fast"), (slv_cfg?"Slow":"Fast")); + test.start_test(tc_label); + begin + // Set bus stall probabilities based on configuration + axis_ctrl_bfm.set_master_stall_prob(mst_cfg?SLOW_STALL_PROB:FAST_STALL_PROB); + axis_ctrl_bfm.set_slave_stall_prob(slv_cfg?SLOW_STALL_PROB:FAST_STALL_PROB); + // Test multiple transactions + for (int n = 0; n < NUM_XACT_PER_TEST; n++) begin + // Generate random data for the payload + // It is illegal in the protocol to have a zero + // data length but we test it here to ensure no lockups + data_vtr.delete(); + for (int i = 0; i < $urandom_range(15); i++) + data_vtr[i] = $urandom(); + // Perform transaction + test.start_timeout(timeout, 10us, "Waiting for AXIS-Ctrl transaction"); + axis_ctrl_transact( + $urandom_range(5), // opcode + $urandom(), // addr + THIS_PORTID, // portid + $urandom(), $urandom(), // rem_epid, rem_portid + data_vtr, + $urandom_range(15), // byte_en + $urandom_range(1), // has_time + {$urandom(), $urandom()}, // timestamp + resp_status, + resp_data + ); + test.end_timeout(timeout); + end + end + test.end_test(); + end + + // AXIS-Ctrl Master Test + // ---------------------------------------- + // Send Ctrlport packets to the DUT and expect Ctrlport + // responses. There is a AXIS-Ctrl slave implemented above + test.start_test("AXIS-Ctrl Master"); + begin + // Test multiple transactions + for (int n = 0; n < NUM_XACT_PER_TEST * 4; n++) begin + test.start_timeout(timeout, 10us, "Waiting for Ctrlport transaction"); + ctrlport_transact( + $urandom_range(1), $urandom_range(1), // wr and rd + $urandom(), // addr + THIS_PORTID, // portid + $urandom(), $urandom(), // rem_epid, rem_portid + $urandom(), // data + $urandom_range(15), // byte_en + $urandom_range(1), // has_time + {$urandom(), $urandom()}, // timestamp + resp_status, + resp_data + ); + + test.end_timeout(timeout); + end + end + test.end_test(); + + // AXIS-Ctrl Master+Slave Test + // ---------------------------------------- + test.start_test("AXIS-Ctrl Master + Slave Simultaneously"); + begin + axis_ctrl_bfm.set_master_stall_prob(FAST_STALL_PROB); + axis_ctrl_bfm.set_slave_stall_prob(FAST_STALL_PROB); + test.start_timeout(timeout, 10us * NUM_XACT_PER_TEST, "Waiting for test case"); + fork + for (int n = 0; n < NUM_XACT_PER_TEST; n++) begin + // Generate random data for the payload + // It is illegal in the protocol to have a zero + // data length but we test it here to ensure no lockups + data_vtr.delete(); + for (int i = 0; i < $urandom_range(15); i++) + data_vtr[i] = $urandom(); + // Perform transaction + axis_ctrl_transact( + $urandom_range(5), // opcode + $urandom(), // addr + THIS_PORTID, // portid + $urandom(), $urandom(), // rem_epid, rem_portid + data_vtr, + $urandom_range(15), // byte_en + $urandom_range(1), // has_time + {$urandom(), $urandom()}, // timestamp + resp_status, + resp_data + ); + end + for (int n = 0; n < NUM_XACT_PER_TEST; n++) begin + ctrlport_transact( + $urandom_range(1), $urandom_range(1), // wr and rd + $urandom(), // addr + THIS_PORTID, // portid + $urandom(), $urandom(), // rem_epid, rem_portid + $urandom(), // data + $urandom_range(15), // byte_en + $urandom_range(1), // has_time + {$urandom(), $urandom()}, // timestamp + resp_status, + resp_data + ); + end + join + test.end_timeout(timeout); + end + test.end_test(); + + // Finish Up + // ---------------------------------------- + // Display final statistics and results + test.end_tb(); + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/split_complex.v b/fpga/usrp3/lib/rfnoc/split_complex.v new file mode 100644 index 000000000..b8b755c10 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/split_complex.v @@ -0,0 +1,30 @@ + +// Copyright 2014, Ettus Research +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +// Module to split a complex stream to I and Q outputs. NOTE -- ONLY works when you can guarantee downstream paths match! + +module split_complex + #(parameter WIDTH=16) + (input [WIDTH*2-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] oi_tdata, output oi_tlast, output oi_tvalid, input oi_tready, + output [WIDTH-1:0] oq_tdata, output oq_tlast, output oq_tvalid, input oq_tready, + output error); + + assign oi_tdata = i_tdata[WIDTH*2-1:WIDTH]; + assign oq_tdata = i_tdata[WIDTH-1:0]; + + assign oi_tlast = i_tlast; + assign oq_tlast = i_tlast; + + assign oi_tvalid = i_tvalid; + assign oq_tvalid = i_tvalid; + + assign i_tready = oi_tready; + + assign error = oi_tready ^ oq_tready; + +endmodule // split_complex + diff --git a/fpga/usrp3/lib/rfnoc/split_stream.v b/fpga/usrp3/lib/rfnoc/split_stream.v new file mode 100644 index 000000000..ae24ca1f8 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/split_stream.v @@ -0,0 +1,30 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module split_stream + #(parameter WIDTH=16, + parameter ACTIVE_MASK=4'b1111) + (input clk, input reset, input clear, // These are not used in plain split_stream + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] o0_tdata, output o0_tlast, output o0_tvalid, input o0_tready, + output [WIDTH-1:0] o1_tdata, output o1_tlast, output o1_tvalid, input o1_tready, + output [WIDTH-1:0] o2_tdata, output o2_tlast, output o2_tvalid, input o2_tready, + output [WIDTH-1:0] o3_tdata, output o3_tlast, output o3_tvalid, input o3_tready); + + assign { o0_tlast, o0_tdata } = { i_tlast, i_tdata }; + assign { o1_tlast, o1_tdata } = { i_tlast, i_tdata }; + assign { o2_tlast, o2_tdata } = { i_tlast, i_tdata }; + assign { o3_tlast, o3_tdata } = { i_tlast, i_tdata }; + + // NOTE -- this violates the AXI spec because tvalids are dependent on treadys. + // It will be ok most of the time, but muxes and demuxes will need a fifo in + // the middle to avoid deadlock + + assign i_tready = ~|(~{o3_tready,o2_tready,o1_tready,o0_tready} & ACTIVE_MASK); + assign { o3_tvalid, o2_tvalid, o1_tvalid, o0_tvalid } = {4{i_tready & i_tvalid}}; + +endmodule // split_stream diff --git a/fpga/usrp3/lib/rfnoc/split_stream_fifo.v b/fpga/usrp3/lib/rfnoc/split_stream_fifo.v new file mode 100644 index 000000000..d99ec115a --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/split_stream_fifo.v @@ -0,0 +1,59 @@ +// +// Copyright 2014 Ettus Research LLC +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module split_stream_fifo + #(parameter WIDTH=16, + parameter FIFO_SIZE=5, + parameter ACTIVE_MASK=4'b1111) + (input clk, input reset, input clear, + input [WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, + output [WIDTH-1:0] o0_tdata, output o0_tlast, output o0_tvalid, input o0_tready, + output [WIDTH-1:0] o1_tdata, output o1_tlast, output o1_tvalid, input o1_tready, + output [WIDTH-1:0] o2_tdata, output o2_tlast, output o2_tvalid, input o2_tready, + output [WIDTH-1:0] o3_tdata, output o3_tlast, output o3_tvalid, input o3_tready); + + wire [WIDTH-1:0] o0_tdata_int, o1_tdata_int, o2_tdata_int, o3_tdata_int; + wire o0_tlast_int, o1_tlast_int, o2_tlast_int, o3_tlast_int; + wire o0_tvalid_int, o1_tvalid_int, o2_tvalid_int, o3_tvalid_int; + wire o0_tready_int, o1_tready_int, o2_tready_int, o3_tready_int; + + split_stream #(.WIDTH(WIDTH), .ACTIVE_MASK(ACTIVE_MASK)) split_stream + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata(i_tdata), .i_tlast(i_tlast), .i_tvalid(i_tvalid), .i_tready(i_tready), + .o0_tdata(o0_tdata_int), .o0_tlast(o0_tlast_int), .o0_tvalid(o0_tvalid_int), .o0_tready(o0_tready_int), + .o1_tdata(o1_tdata_int), .o1_tlast(o1_tlast_int), .o1_tvalid(o1_tvalid_int), .o1_tready(o1_tready_int), + .o2_tdata(o2_tdata_int), .o2_tlast(o2_tlast_int), .o2_tvalid(o2_tvalid_int), .o2_tready(o2_tready_int), + .o3_tdata(o3_tdata_int), .o3_tlast(o3_tlast_int), .o3_tvalid(o3_tvalid_int), .o3_tready(o3_tready_int)); + + generate + if(ACTIVE_MASK[0]) + axi_fifo #(.WIDTH(WIDTH+1), .SIZE(FIFO_SIZE)) axi_fifo0 + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata({o0_tlast_int, o0_tdata_int}), .i_tvalid(o0_tvalid_int), .i_tready(o0_tready_int), + .o_tdata({o0_tlast, o0_tdata}), .o_tvalid(o0_tvalid), .o_tready(o0_tready), + .occupied(), .space()); + if(ACTIVE_MASK[1]) + axi_fifo #(.WIDTH(WIDTH+1), .SIZE(FIFO_SIZE)) axi_fifo1 + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata({o1_tlast_int, o1_tdata_int}), .i_tvalid(o1_tvalid_int), .i_tready(o1_tready_int), + .o_tdata({o1_tlast, o1_tdata}), .o_tvalid(o1_tvalid), .o_tready(o1_tready), + .occupied(), .space()); + if(ACTIVE_MASK[2]) + axi_fifo #(.WIDTH(WIDTH+1), .SIZE(FIFO_SIZE)) axi_fifo2 + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata({o2_tlast_int, o2_tdata_int}), .i_tvalid(o2_tvalid_int), .i_tready(o2_tready_int), + .o_tdata({o2_tlast, o2_tdata}), .o_tvalid(o2_tvalid), .o_tready(o2_tready), + .occupied(), .space()); + if(ACTIVE_MASK[3]) + axi_fifo #(.WIDTH(WIDTH+1), .SIZE(FIFO_SIZE)) axi_fifo3 + (.clk(clk), .reset(reset), .clear(clear), + .i_tdata({o3_tlast_int, o3_tdata_int}), .i_tvalid(o3_tvalid_int), .i_tready(o3_tready_int), + .o_tdata({o3_tlast, o3_tdata}), .o_tvalid(o3_tvalid), .o_tready(o3_tready), + .occupied(), .space()); + endgenerate + +endmodule // split_stream_fifo diff --git a/fpga/usrp3/lib/rfnoc/utils/Makefile.srcs b/fpga/usrp3/lib/rfnoc/utils/Makefile.srcs new file mode 100644 index 000000000..c8fb9648f --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/Makefile.srcs @@ -0,0 +1,29 @@ +# +# Copyright 2018 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +################################################## +# RFNoC Utility Sources +################################################## +RFNOC_UTIL_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/utils/, \ +chdr_trim_payload.v \ +chdr_pad_packet.v \ +context_handler_sync.v \ +context_builder.v \ +context_parser.v \ +ctrlport_timer.v \ +ctrlport_combiner.v \ +ctrlport_decoder.v \ +ctrlport_decoder_param.v \ +ctrlport_splitter.v \ +ctrlport_resp_combine.v \ +ctrlport_clk_cross.v \ +ctrlport_reg_rw.v \ +ctrlport_reg_ro.v \ +ctrlport_to_settings_bus.v \ +noc_shell_generic_ctrlport_pyld_chdr.v \ +timekeeper.v \ +ctrlport_terminator.v \ +)) diff --git a/fpga/usrp3/lib/rfnoc/utils/axis_ctrlport_reg.v b/fpga/usrp3/lib/rfnoc/utils/axis_ctrlport_reg.v new file mode 100644 index 000000000..52a372a62 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/axis_ctrlport_reg.v @@ -0,0 +1,143 @@ +// +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ctrlport_reg +// +// Description: +// +// Converts control port writes to an AXI-stream data stream. Flow control is +// handled by pushing back on the ctrlport interface (i.e., by not +// acknowledging ctrlport writes until the AXI-stream data is accepted). +// +// Parameters: +// +// ADDR : Writes to this address will makeup the payload of the +// packet. +// +// USE_ADDR_LAST : Indicate if we the ADDR_LAST register generated. Set to 1 +// if TLAST is needed. +// +// ADDR_LAST : A write to this address will complete the packet (output +// the last word with TLAST asserted). +// +// DWIDTH : Width of the AXI-stream data bus +// +// USE_FIFO : Indicate if you want a FIFO to be inserted before the output. +// +// FIFm_SIZE : The FIFO depth will be 2^FIFm_SIZE +// +// DATA_AT_RESET : Value of TDATA at reset. +// +// VALID_AT_RESET : State of TVALID at reset. +// +// LAST_AT_RESET : State of TLAST at reset. +// + +module axis_ctrlport_reg #( + parameter ADDR = 0, + parameter USE_ADDR_LAST = 0, + parameter ADDR_LAST = ADDR+1, + parameter DWIDTH = 32, + parameter USE_FIFO = 0, + parameter FIFm_SIZE = 5, + parameter DATA_AT_RESET = 0, + parameter VALID_AT_RESET = 0, + parameter LAST_AT_RESET = 0 +) ( + input clk, + input reset, + + //--------------------------------------------------------------------------- + // Control Port + //--------------------------------------------------------------------------- + + // Control Port Slave (Request) + input wire s_ctrlport_req_wr, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + + // Control Port Slave (Response) + output reg s_ctrlport_resp_ack, + + //--------------------------------------------------------------------------- + // AXI-Stream Master + //--------------------------------------------------------------------------- + + // AXI-Stream Output + output [DWIDTH-1:0] m_tdata, + output m_tlast, + output m_tvalid, + input m_tready +); + + reg [DWIDTH-1:0] m_tdata_int = DATA_AT_RESET; + reg m_tlast_int = VALID_AT_RESET; + reg m_tvalid_int = LAST_AT_RESET; + wire m_tready_int; + + + //--------------------------------------------------------------------------- + // CtrlPort to AXI-Stream Logic + //--------------------------------------------------------------------------- + + always @(posedge clk) begin + if (reset) begin + m_tdata_int <= DATA_AT_RESET; + m_tvalid_int <= VALID_AT_RESET; + m_tlast_int <= LAST_AT_RESET; + s_ctrlport_resp_ack <= 1'b0; + end else begin + if (m_tvalid_int & m_tready_int) begin + s_ctrlport_resp_ack <= 1'b1; + m_tvalid_int <= 1'b0; + m_tlast_int <= 1'b0; + end else begin + s_ctrlport_resp_ack <= 1'b0; + end + + if (s_ctrlport_req_wr) begin + if (s_ctrlport_req_addr == ADDR) begin + m_tdata_int <= s_ctrlport_req_data; + m_tvalid_int <= 1'b1; + m_tlast_int <= 1'b0; + end else if (USE_ADDR_LAST && ADDR_LAST == s_ctrlport_req_addr) begin + m_tdata_int <= s_ctrlport_req_data; + m_tvalid_int <= 1'b1; + m_tlast_int <= 1'b1; + end + end + end + end + + + //--------------------------------------------------------------------------- + // Output FIFO + //--------------------------------------------------------------------------- + + if (USE_FIFO) begin : gen_fifo + axi_fifo #( + .DWIDTH (DWIDTH+1), + .SIZE (FIFm_SIZE) + ) axi_fifo ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({m_tlast_int, m_tdata_int}), + .i_tvalid (m_tvalid_int), + .i_tready (m_tready_int), + .o_tdata ({m_tlast, m_tdata}), + .o_tvalid (m_tvalid), + .o_tready (m_tready), + .space (), + .occupied () + ); + end else begin : nm_gen_fifo + assign m_tdata = m_tdata_int; + assign m_tlast = m_tlast_int; + assign m_tvalid = m_tvalid_int; + assign m_tready_int = m_tready; + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/utils/chdr_pad_packet.v b/fpga/usrp3/lib/rfnoc/utils/chdr_pad_packet.v new file mode 100644 index 000000000..14d63fe74 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/chdr_pad_packet.v @@ -0,0 +1,132 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_pad_packet +// Description: +// This module pads extra data on the AXI-Stream bus +// to the requested packet size. This module is for +// creating len-sized packets, for DMA engines that +// do not support partial transfers. +// +// Parameters: +// - CHDR_W: Width of the CHDR tdata bus in bits +// +// Signals: +// - s_axis_* : Input AXI-Stream CHDR bus +// - m_axis_* : Output AXI-Stream CHDR bus +// - len : Requested number of CHDR_W lines in the packet (must be > 1) + +`default_nettype none +module chdr_pad_packet #( + parameter CHDR_W = 256 +)( + input wire clk, + input wire rst, + input wire [15:0] len, + input wire [CHDR_W-1:0] s_axis_tdata, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output reg s_axis_tready, + output wire [CHDR_W-1:0] m_axis_tdata, + output reg m_axis_tlast, + output reg m_axis_tvalid, + input wire m_axis_tready +); + + localparam [1:0] ST_HEADER = 2'd0; + localparam [1:0] ST_BODY = 2'd1; + localparam [1:0] ST_PAD = 2'd2; + localparam [1:0] ST_DROP = 2'd3; + + reg [1:0] state; + reg [15:0] lines_left; + + always @(posedge clk) begin + if (rst || (len <= 16'd1)) begin + state <= ST_HEADER; + end else begin + case(state) + ST_HEADER: begin + lines_left <= len - 16'd1; + if (s_axis_tvalid && m_axis_tready) begin + if (!s_axis_tlast) begin + // Packet is more than one line and length not reached + state <= ST_BODY; + end else begin + // Packet is only one line and length not reached + state <= ST_PAD; + end + end + end + ST_BODY: begin + if (s_axis_tvalid && m_axis_tready) begin + lines_left <= lines_left - 16'd1; + if (s_axis_tlast && (lines_left == 16'd1)) begin + // End of input and reached length + state <= ST_HEADER; + end else if (s_axis_tlast && (lines_left != 16'd1)) begin + // End of input, but length not reached + state <= ST_PAD; + end else if (!s_axis_tlast && (lines_left == 16'd1)) begin + // Reached length, but input continues... + state <= ST_DROP; + end + end + end + ST_PAD: begin + if (m_axis_tready) begin + lines_left <= lines_left - 16'd1; + if (lines_left == 16'd1) begin + state <= ST_HEADER; + end + end + end + ST_DROP: begin + if (s_axis_tvalid && s_axis_tlast) begin + state <= ST_HEADER; + end + end + default: begin + // We should never get here + state <= ST_HEADER; + end + endcase + end + end + + assign m_axis_tdata = s_axis_tdata; + + always @(*) begin + case(state) + ST_HEADER: begin + if (len <= 16'd1) begin + s_axis_tready <= 1'b0; + m_axis_tvalid <= 1'b0; + end else begin + s_axis_tready <= m_axis_tready; + m_axis_tvalid <= s_axis_tvalid; + end + m_axis_tlast <= 1'b0; + end + ST_BODY: begin + s_axis_tready <= m_axis_tready; + m_axis_tvalid <= s_axis_tvalid; + m_axis_tlast <= (lines_left == 16'd1); + end + ST_PAD: begin + s_axis_tready <= 1'b0; + m_axis_tvalid <= 1'b1; + m_axis_tlast <= (lines_left == 16'd1); + end + ST_DROP: begin + s_axis_tready <= 1'b1; + m_axis_tvalid <= 1'b0; + m_axis_tlast <= 1'b0; + end + endcase + end + +endmodule // chdr_pad_packet +`default_nettype wire diff --git a/fpga/usrp3/lib/rfnoc/utils/chdr_trim_payload.v b/fpga/usrp3/lib/rfnoc/utils/chdr_trim_payload.v new file mode 100644 index 000000000..ffeec1437 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/chdr_trim_payload.v @@ -0,0 +1,97 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_trim_payload +// Description: +// This module trims any extra data on the AXI-Stream +// bus to the CHDR payload size. This ensures that the +// line with tlast is the actual last line of the packet +// +// Parameters: +// - CHDR_W: Width of the CHDR tdata bus in bits +// - USER_W: Width of the tuser bus in bits +// +// Signals: +// - s_axis_* : Input AXI-Stream CHDR bus +// - m_axis_* : Output AXI-Stream CHDR bus + +module chdr_trim_payload #( + parameter CHDR_W = 256, + parameter USER_W = 16 +)( + input wire clk, + input wire rst, + input wire [CHDR_W-1:0] s_axis_tdata, + input wire [USER_W-1:0] s_axis_tuser, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + output wire [CHDR_W-1:0] m_axis_tdata, + output wire [USER_W-1:0] m_axis_tuser, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready +); + + `include "../core/rfnoc_chdr_utils.vh" + + localparam LOG2_CHDR_W_BYTES = $clog2(CHDR_W/8); + + localparam [1:0] ST_HEADER = 2'd0; + localparam [1:0] ST_BODY = 2'd1; + localparam [1:0] ST_DUMP = 2'd2; + + reg [1:0] state; + reg [15:0] lines_left; + + wire [15:0] pkt_length = chdr_get_length(s_axis_tdata[63:0]); + wire [15:0] lines_in_pkt = pkt_length[15:LOG2_CHDR_W_BYTES] + (|pkt_length[LOG2_CHDR_W_BYTES-1:0]); + wire last_line = (lines_left == 16'd0); + + always @(posedge clk) begin + if (rst) begin + state <= ST_HEADER; + lines_left <= 16'd0; + end else if(s_axis_tvalid & s_axis_tready) begin + case(state) + ST_HEADER: begin + if ((lines_in_pkt == 16'd1) && !s_axis_tlast) begin + // First line is valid, dump rest + state <= ST_DUMP; + end else begin + lines_left <= lines_in_pkt - 16'd2; + state <= ST_BODY; + end + end + ST_BODY: begin + if (last_line && !s_axis_tlast) begin + state <= ST_DUMP; + end else if (s_axis_tlast) begin + state <= ST_HEADER; + end else begin + lines_left <= lines_left - 16'd1; + end + end + ST_DUMP: begin + if (s_axis_tlast) + state <= ST_HEADER; + end + default: begin + // We should never get here + state <= ST_HEADER; + end + endcase + end + end + + assign m_axis_tdata = s_axis_tdata; + assign m_axis_tuser = s_axis_tuser; + assign m_axis_tlast = s_axis_tlast || + ((state == ST_HEADER) && (lines_in_pkt == 16'd1)) || + ((state == ST_BODY) && last_line); + assign m_axis_tvalid = s_axis_tvalid && (state != ST_DUMP); + assign s_axis_tready = m_axis_tready || (state == ST_DUMP); + +endmodule // chdr_trim_payload diff --git a/fpga/usrp3/lib/rfnoc/utils/context_builder.v b/fpga/usrp3/lib/rfnoc/utils/context_builder.v new file mode 100644 index 000000000..83171e831 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/context_builder.v @@ -0,0 +1,392 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: context_builder +// +// Description: +// +// This module builds the payload and context data streams necessary for RFnoC +// communication through an AXI-Stream Raw Data (Simple Interface). It takes as +// input an AXI-Stream data bus and sideband buses containing the timestamp and +// packet flags. +// +// For each AXI-Stream raw data packet that is input, the same data packet will +// be output in the payload stream along with the context stream that's +// necessary to create a CHDR packet for this data packet. +// +// The timestamp and flags must be input coincident with the AXI-Stream data +// input. The timestamp and flag inputs will be sampled coincident with the +// last word of data in the packet (i.e., when tlast is asserted). +// +// In order to determine the length of the packet, the entire packet is +// buffered before the header in the context stream is generated. Therefore, +// the internal FIFO size (configured by MTU) must be large enough to buffer +// the maximum packet size. +// +// The maximum number of packets that can be simultaneously buffered in this +// block is limited by INFO_FIFO_SIZE, where the maximum number of packets is +// 2**INFO_FIFO_SIZE. This must be large enough to handle the expected worse +// case, or data flow will stall. +// +// Parameters: +// +// CHDR_W : Width of the CHDR interface (width of context words) +// ITEM_W : Number of samples/items per data word +// NIPC : Number of samples/items per clock cycle +// MTU : Log2 of maximum transfer unit (maximum packet size) in CHDR_W sized words. +// INFO_FIFO_SIZE : Size of the internal packet info FIFO is 2**INFO_FIFO_SIZE +// + +module context_builder #( + parameter CHDR_W = 64, + parameter ITEM_W = 32, + parameter NIPC = 2, + parameter MTU = 10, + parameter INFO_FIFO_SIZE = 5 +) ( + input axis_data_clk, + input axis_data_rst, + + // Data stream in (AXI-Stream) + input wire [(ITEM_W*NIPC)-1:0] s_axis_tdata, + input wire [ NIPC-1:0] s_axis_tkeep, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + // Sideband info (sampled on the first cycle of the packet) + input wire [ 63:0] s_axis_ttimestamp, + input wire s_axis_thas_time, + input wire s_axis_teov, + input wire s_axis_teob, + + // Data stream out (AXI-Stream Payload) + output wire [(ITEM_W*NIPC)-1:0] m_axis_payload_tdata, + output wire [ NIPC-1:0] m_axis_payload_tkeep, + output wire m_axis_payload_tlast, + output wire m_axis_payload_tvalid, + input wire m_axis_payload_tready, + + // Data stream out (AXI-Stream Context) + output reg [CHDR_W-1:0] m_axis_context_tdata, + output reg [ 3:0] m_axis_context_tuser, + output reg m_axis_context_tlast, + output reg m_axis_context_tvalid = 1'b0, + input wire m_axis_context_tready +); + `include "../core/rfnoc_chdr_utils.vh" + + + reg packet_info_fifo_full; + + + //--------------------------------------------------------------------------- + // Data FIFO + //--------------------------------------------------------------------------- + // + // This FIFO buffers packet data while we calculate each packet's length. + // + //--------------------------------------------------------------------------- + + wire s_axis_tvalid_df; + wire s_axis_tready_df; + + // Compute MTU (maximum packet) size in data words from the CHDR word MTU. + localparam DATA_FIFO_SIZE = MTU + $clog2(CHDR_W) - $clog2(ITEM_W*NIPC); + + axi_fifo #( + .WIDTH (NIPC + 1 + ITEM_W*NIPC), + .SIZE (DATA_FIFO_SIZE) + ) data_fifo ( + .clk (axis_data_clk), + .reset (axis_data_rst), + .clear (1'b0), + .i_tdata ({s_axis_tkeep, s_axis_tlast, s_axis_tdata}), + .i_tvalid (s_axis_tvalid_df), + .i_tready (s_axis_tready_df), + .o_tdata ({m_axis_payload_tkeep, m_axis_payload_tlast, m_axis_payload_tdata}), + .o_tvalid (m_axis_payload_tvalid), + .o_tready (m_axis_payload_tready), + .space (), + .occupied () + ); + + // To prevent the packet info FIFO from overflowing, we block the input of + // new packets to the data FIFO whenever the packet info FIFO fills up. + assign s_axis_tready = s_axis_tready_df & ~packet_info_fifo_full; + assign s_axis_tvalid_df = s_axis_tvalid & ~packet_info_fifo_full; + + + //--------------------------------------------------------------------------- + // Timestamp and Flags Capture + //--------------------------------------------------------------------------- + // + // The timestamp and flags that we use for each packet is that of the last + // data word. This maintains compatibility with how tuser was used on old + // RFnoC. Here, we capture this information at the start of the packet. At + // the end of the packet, when the length is known, this value will be + // inserted into the packet info FIFO. + // + //--------------------------------------------------------------------------- + + reg [63:0] packet_timestamp; + reg packet_has_time; + reg packet_eov; + reg packet_eob; + + always @(posedge axis_data_clk) begin + if (s_axis_tvalid & s_axis_tready & s_axis_tlast) begin + packet_timestamp <= s_axis_ttimestamp; + packet_has_time <= s_axis_thas_time; + packet_eov <= s_axis_teov; + packet_eob <= s_axis_teob; + end + end + + + //--------------------------------------------------------------------------- + // Length Counter + //--------------------------------------------------------------------------- + // + // Here We track the state of the incoming packet to determine its length. + // + //--------------------------------------------------------------------------- + + reg [15:0] packet_length, length_count; + reg packet_length_valid; + + always @(posedge axis_data_clk) begin : length_counter + if (axis_data_rst) begin + length_count <= 0; + packet_length <= 0; + packet_length_valid <= 1'b0; + end else begin : length_counter_main + // Calculate the length of this word in bytes, taking tkeep into account + integer i; + integer num_bytes; + num_bytes = 0; + for (i = 0; i < NIPC; i = i + 1) begin + num_bytes = num_bytes + (s_axis_tkeep[i]*(ITEM_W/8)); + end + + // Update the packet length if the word is accepted + packet_length_valid <= 1'b0; + if (s_axis_tvalid & s_axis_tready) begin + length_count <= length_count + num_bytes; + + if (s_axis_tlast) begin + length_count <= 0; + packet_length <= length_count + num_bytes; + packet_length_valid <= 1'b1; + end + end + end + end + + + //--------------------------------------------------------------------------- + // Packet Info FIFO + //--------------------------------------------------------------------------- + // + // This FIFO stores the packet info (length, timestamp, flags) for each fully + // received packet. Due to AXI-Stream flow control, we may end up with + // multiple packets being buffered in the data_fifo. The packet_info_fifo + // here stores each packet's info until the packet is ready to go out. + // + //--------------------------------------------------------------------------- + + wire [63:0] next_packet_timestamp; + wire next_packet_has_time; + wire next_packet_eob; + wire next_packet_eov; + wire [15:0] next_packet_length; + wire [15:0] packet_info_space; + wire packet_info_valid; + reg packet_info_ready = 1'b0; + + axi_fifo #( + .WIDTH (3 + 64 + 16), + .SIZE (INFO_FIFO_SIZE) + ) packet_info_fifo ( + .clk (axis_data_clk), + .reset (axis_data_rst), + .clear (1'b0), + .i_tdata ({packet_eov, + packet_eob, + packet_has_time, + packet_timestamp, + packet_length}), + .i_tvalid (packet_length_valid), + .i_tready (), + .o_tdata ({next_packet_eov, + next_packet_eob, + next_packet_has_time, + next_packet_timestamp, + next_packet_length}), + .o_tvalid (packet_info_valid), + .o_tready (packet_info_ready), + .space (packet_info_space), + .occupied () + ); + + + // Create a register to indicate when the FIFO is (almost) full. We leave + // some space so that we can accept a new packet during the delay before data + // transfer gets blocked. + always @(posedge axis_data_clk) begin + if (axis_data_rst) begin + packet_info_fifo_full <= 1'b0; + end else begin + if (packet_info_space < 4) begin + packet_info_fifo_full <= 1'b1; + end else begin + packet_info_fifo_full <= 1'b0; + end + end + end + + + //--------------------------------------------------------------------------- + // Context State Machine + //--------------------------------------------------------------------------- + // + // This state machine controls generation of the context packets (containing + // the header and timestamp) that are output on m_axis_context, which will be + // needed to create the CHDR packet. + // + //--------------------------------------------------------------------------- + + localparam ST_IDLE = 0; + localparam ST_HEADER = 1; + localparam ST_TIMESTAMP = 2; + + reg [ 1:0] state = ST_IDLE; // Current context FSM state + reg [15:0] seq_num = 0; // CHDR sequence number + + reg [15:0] chdr_length; + reg [ 2:0] chdr_pkt_type; + reg [63:0] chdr_header; + + + always @(*) begin : calc_chdr_header + // Calculate byte length of the CHDR packet by adding the header and + // timestamp length to the length of the payload. + if (CHDR_W == 64) begin + // If CHDR_W is 64-bit, timestamp is in a separate word + if (next_packet_has_time) begin + chdr_length = next_packet_length + 16; // Add two 64-bit CHDR words + end else begin + chdr_length = next_packet_length + 8; // Add one 64-bit CHDR word + end + end else begin + // If CHDR_W is 128-bit or larger, timestamp is in the same word as the header + chdr_length = next_packet_length + CHDR_W/8; // Add one CHDR word + end + + // Determine the packet type + if (next_packet_has_time) begin + chdr_pkt_type = CHDR_PKT_TYPE_DATA_TS; + end else begin + chdr_pkt_type = CHDR_PKT_TYPE_DATA; + end + + // Build up header + chdr_header = chdr_build_header( + 6'b0, // vc + next_packet_eob, // eob + next_packet_eov, // eov + chdr_pkt_type, // pkt_type + 0, // num_mdata + seq_num, // seq_num + chdr_length, // length of CHDR packet in bytes + 0 // dst_epid + ); + end + + + always @(posedge axis_data_clk) begin + if (axis_data_rst) begin + state <= ST_IDLE; + seq_num <= 'd0; + packet_info_ready <= 1'b0; + m_axis_context_tvalid <= 1'b0; + end else begin + packet_info_ready <= 1'b0; + + if (CHDR_W == 64) begin : gen_ctx_fsm_64 + // For 64-bit CHDR_W, we require two words, one for the header and one + // for the timestamp. + case (state) + ST_IDLE: begin + m_axis_context_tdata <= chdr_header; + m_axis_context_tuser <= CONTEXT_FIELD_HDR; + m_axis_context_tlast <= !next_packet_has_time; + if (packet_info_valid && !packet_info_ready) begin + m_axis_context_tvalid <= 1'b1; + seq_num <= seq_num + 1; + state <= ST_HEADER; + end + end + + ST_HEADER : begin + // Wait for header to be accepted + if (m_axis_context_tready) begin + packet_info_ready <= 1'b1; + m_axis_context_tdata <= next_packet_timestamp; + if (next_packet_has_time) begin + m_axis_context_tlast <= 1'b1; + m_axis_context_tuser <= CONTEXT_FIELD_TS; + state <= ST_TIMESTAMP; + end else begin + m_axis_context_tlast <= 1'b0; + m_axis_context_tvalid <= 1'b0; + state <= ST_IDLE; + end + end + end + + ST_TIMESTAMP : begin + // Wait for timestamp to be accepted + if (m_axis_context_tready) begin + m_axis_context_tvalid <= 1'b0; + state <= ST_IDLE; + end + end + + default: state <= ST_IDLE; + endcase + + end else begin : gen_ctx_fsm_128 + // For 128-bit and larger CHDR_W, we need the header and timestamp in + // the same word. + case (state) + ST_IDLE: begin + m_axis_context_tdata <= { next_packet_timestamp, chdr_header }; + m_axis_context_tuser <= next_packet_has_time ? CONTEXT_FIELD_HDR_TS : + CONTEXT_FIELD_HDR; + m_axis_context_tlast <= 1'b1; + if (packet_info_valid) begin + m_axis_context_tvalid <= 1'b1; + seq_num <= seq_num + 1; + packet_info_ready <= 1'b1; + state <= ST_HEADER; + end + end + + ST_HEADER : begin + // Wait for header to be accepted + if (m_axis_context_tready) begin + m_axis_context_tvalid <= 1'b0; + state <= ST_IDLE; + end + end + + default : state <= ST_IDLE; + endcase + + end + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/utils/context_handler_sync.v b/fpga/usrp3/lib/rfnoc/utils/context_handler_sync.v new file mode 100644 index 000000000..c7f899ee9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/context_handler_sync.v @@ -0,0 +1,110 @@ +// +// Copyright 2018-2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: context_handler_sync +// Description: +// +// Parameters: +// - CHDR_W: Width of the input CHDR bus in bits +// +// Signals: +// + +module context_handler_sync #( + parameter CHDR_W = 256, + parameter ITEM_W = 32, + parameter NIPC = 2 +)( + // Clock and reset + input wire clk, + input wire rst, + // Context stream in (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_context_tdata, + input wire [3:0] s_axis_context_tuser, + input wire s_axis_context_tlast, + input wire s_axis_context_tvalid, + output wire s_axis_context_tready, + // Context stream out (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_context_tdata, + output wire [3:0] m_axis_context_tuser, + output wire m_axis_context_tlast, + output wire m_axis_context_tvalid, + input wire m_axis_context_tready, + // Input payload stream monitor + input wire [NIPC-1:0] in_payload_tkeep, + input wire in_payload_tlast, + input wire in_payload_tvalid, + input wire in_payload_tready, + // Output payload stream monitor + input wire [NIPC-1:0] out_payload_tkeep, + input wire out_payload_tlast, + input wire out_payload_tvalid, + input wire out_payload_tready, + // Status + output reg length_err_stb, + output reg seq_err_stb +); + + `include "../core/rfnoc_chdr_utils.vh" + + // Thermometer to binary decoder + // 4'b0000 => 3'd0 + // 4'b0001 => 3'd1 + // 4'b0011 => 3'd2 + // 4'b0111 => 3'd3 + // 4'b1111 => 3'd4 + function [$clog2(NIPC):0] thermo2bin(input [NIPC-1:0] thermo); + reg [NIPC:0] onehot; + integer i; + begin + onehot = thermo + 1; + thermo2bin = 0; + for (i = 0; i <= NIPC; i=i+1) + if (onehot[i]) + thermo2bin = thermo2bin | i; + end + endfunction + + axi_fifo #(.WIDTH(CHDR_W+4+1), .SIZE(1)) ctxt_pipe_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({s_axis_context_tlast, s_axis_context_tuser, s_axis_context_tdata}), + .i_tvalid(s_axis_context_tvalid), .i_tready(s_axis_context_tready), + .o_tdata({m_axis_context_tlast, m_axis_context_tuser, m_axis_context_tdata}), + .o_tvalid(m_axis_context_tvalid), .o_tready(m_axis_context_tready), + .space(), .occupied() + ); + + wire is_ctxt_hdr = s_axis_context_tvalid && s_axis_context_tready && + (s_axis_context_tuser == CONTEXT_FIELD_HDR || + s_axis_context_tuser == CONTEXT_FIELD_HDR_TS); + + reg [15:0] exp_pkt_len = 16'd0; + reg [15:0] exp_seq_num = 16'd0; + reg check_seq_num = 1'b0; + always @(posedge clk) begin + if (rst) begin + exp_pkt_len <= 16'd0; + check_seq_num <= 1'b0; + end else if (is_ctxt_hdr) begin + check_seq_num <= 1'b1; + exp_pkt_len <= chdr_get_length(s_axis_context_tdata[63:0]); + exp_seq_num <= chdr_get_seq_num(s_axis_context_tdata[63:0]) + 16'd1; + end + seq_err_stb <= is_ctxt_hdr && check_seq_num && + (exp_seq_num != chdr_get_seq_num(s_axis_context_tdata[63:0])); + end + + reg [15:0] pyld_pkt_len = 16'd0; + always @(posedge clk) begin + if (rst) begin + pyld_pkt_len <= 16'd0; + end else if (in_payload_tvalid && in_payload_tready) begin + pyld_pkt_len <= in_payload_tlast ? 16'd0 : (pyld_pkt_len + ((ITEM_W*NIPC)/8)); + end + length_err_stb <= in_payload_tvalid && in_payload_tready && in_payload_tlast && + (pyld_pkt_len + (thermo2bin(in_payload_tkeep)*(ITEM_W/8)) != exp_pkt_len); + end + +endmodule // context_handler_sync diff --git a/fpga/usrp3/lib/rfnoc/utils/context_parser.v b/fpga/usrp3/lib/rfnoc/utils/context_parser.v new file mode 100644 index 000000000..2d0759af7 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/context_parser.v @@ -0,0 +1,230 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: context_parser +// +// Description: +// +// This module extracts the context information from the AXI-Stream Raw Data +// (Simple Interface) in RFNoC and outputs it as sideband information for an +// AXI-Stream data bus. This includes the timestamp, if present, and packet +// flags (EOB, EOV). +// +// For each payload and context packet that is input, one data packet will be +// output along with the sideband data. +// +// Parameters: +// +// CHDR_W : Width of the CHDR interface (width of context words) +// ITEM_W : Width of each item/sample +// NIPC : Number of items/samples per clock cycle +// + +module context_parser #( + parameter CHDR_W = 64, + parameter ITEM_W = 32, + parameter NIPC = 2 +) ( + input axis_data_clk, + input axis_data_rst, + + // AXI-Stream Raw Data (Simple Interface) input + input wire [(ITEM_W*NIPC)-1:0] s_axis_payload_tdata, + input wire [ NIPC-1:0] s_axis_payload_tkeep, + input wire s_axis_payload_tlast, + input wire s_axis_payload_tvalid, + output wire s_axis_payload_tready, + // + input wire [ CHDR_W-1:0] s_axis_context_tdata, + input wire s_axis_context_tlast, + input wire s_axis_context_tvalid, + output wire s_axis_context_tready, + + // Data stream out (AXI-Stream) + output wire [(ITEM_W*NIPC)-1:0] m_axis_tdata, + output wire [ NIPC-1:0] m_axis_tkeep, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready, + // Sideband information + output wire [ 63:0] m_axis_ttimestamp, + output wire m_axis_thas_time, + output wire [ 15:0] m_axis_tlength, // Payload length, in bytes + output wire m_axis_teov, + output wire m_axis_teob +); + + `include "../core/rfnoc_chdr_utils.vh" + + + // Sideband-FIFO signals + reg sideband_i_tvalid = 1'b0; + wire sideband_i_tready; + wire sideband_o_tvalid; + wire sideband_o_tready; + + // Sideband data for next packet + reg [63:0] timestamp; + reg has_time; + reg [15:0] length; + reg eov; + reg eob; + + + //--------------------------------------------------------------------------- + // Context State Machine + //--------------------------------------------------------------------------- + // + // This state machine parses the context data so that it can be output as + // sideband information on the AXI-Stream output. + // + // This state machine assumes that the context packet is always properly + // formed (i.e., it doesn't explicitly check for and drop malformed packets). + // + //--------------------------------------------------------------------------- + + localparam ST_HEADER = 0; + localparam ST_TIMESTAMP = 1; + localparam ST_METADATA = 2; + + reg [1:0] state = ST_HEADER; + + always @(posedge axis_data_clk) begin + if (axis_data_rst) begin + state <= ST_HEADER; + sideband_i_tvalid <= 1'b0; + end else begin + sideband_i_tvalid <= 1'b0; + + case(state) + ST_HEADER: begin + // Grab header information + eov <= chdr_get_eov(s_axis_context_tdata[63:0]); + eob <= chdr_get_eob(s_axis_context_tdata[63:0]); + has_time <= chdr_get_has_time(s_axis_context_tdata[63:0]); + length <= chdr_calc_payload_length(CHDR_W, s_axis_context_tdata[63:0]); + + if (s_axis_context_tvalid && s_axis_context_tready) begin + if (CHDR_W > 64) begin + // When CHDR_W > 64, the timestamp is a part of the header word + if (chdr_get_has_time(s_axis_context_tdata[63:0])) begin + timestamp <= s_axis_context_tdata[127:64]; + end + + // Load the sideband data into the FIFO + sideband_i_tvalid <= 1'b1; + + // Check if there's more context packet to wait for + if (!s_axis_context_tlast) begin + state <= ST_METADATA; + end + + end else begin + // When CHDR_W == 64, the timestamp comes after the header word + if (s_axis_context_tlast) begin + // Context packet is ending. Load the sideband data into FIFO. + sideband_i_tvalid <= 1'b1; + end else begin + // More context packet to come + if (chdr_get_has_time(s_axis_context_tdata[63:0])) begin + state <= ST_TIMESTAMP; + end else begin + // Load the sideband data into the FIFO + sideband_i_tvalid <= 1'b1; + state <= ST_METADATA; + end + end + end + end + end + + ST_TIMESTAMP: begin + // This state only applies when CHDR_W == 64 + if (s_axis_context_tvalid && s_axis_context_tready) begin + timestamp <= s_axis_context_tdata; + + // Load the sideband data into the FIFO + sideband_i_tvalid <= 1'b1; + + // Check if there's more context packet to wait for + if (s_axis_context_tlast) begin + state <= ST_HEADER; + end else begin + state <= ST_METADATA; + end + end + end + + ST_METADATA: begin + // This module doesn't handle metadata currently, so just ignore it + if (s_axis_context_tvalid && s_axis_context_tready) begin + if (s_axis_context_tlast) begin + state <= ST_HEADER; + end + end + end + + default: state <= ST_HEADER; + endcase + end + end + + + //--------------------------------------------------------------------------- + // Sideband Data FIFO + //--------------------------------------------------------------------------- + // + // Here we buffer the sideband information into a FIFO. The information will + // be output coincident with the corresponding data packet. + // + //--------------------------------------------------------------------------- + + axi_fifo_short #( + .WIDTH (83) + ) sideband_fifo ( + .clk (axis_data_clk), + .reset (axis_data_rst), + .clear (1'b0), + .i_tdata ({length, eob, eov, has_time, timestamp}), + .i_tvalid (sideband_i_tvalid), + .i_tready (sideband_i_tready), + .o_tdata ({m_axis_tlength, m_axis_teob, m_axis_teov, + m_axis_thas_time, m_axis_ttimestamp}), + .o_tvalid (sideband_o_tvalid), + .o_tready (sideband_o_tready), + .space (), + .occupied () + ); + + + //--------------------------------------------------------------------------- + // Payload Transfer Logic + //--------------------------------------------------------------------------- + // + // Here we handle the logic for AXI-Stream flow control. The data and + // sideband information are treated as a single AXI-Stream bus. The sideband + // information is output for the duration of the packet and is popped off of + // the sideband FIFO at the end of each packet. + // + //--------------------------------------------------------------------------- + + // We can only accept context info when there's room in the sideband FIFO. + assign s_axis_context_tready = sideband_i_tready; + + // Allow payload transfer whenever the sideband info is valid + assign s_axis_payload_tready = (m_axis_tready & sideband_o_tvalid); + assign m_axis_tvalid = (s_axis_payload_tvalid & sideband_o_tvalid); + + // Pop off the sideband info at the end of each packet + assign sideband_o_tready = (s_axis_payload_tready & + s_axis_payload_tvalid & + s_axis_payload_tlast); + + // Other AXI-Stream signals pass through untouched + assign m_axis_tdata = s_axis_payload_tdata; + assign m_axis_tkeep = s_axis_payload_tkeep; + assign m_axis_tlast = s_axis_payload_tlast; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/utils/ctrlport_clk_cross.v b/fpga/usrp3/lib/rfnoc/utils/ctrlport_clk_cross.v new file mode 100644 index 000000000..6aa74c74f --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/ctrlport_clk_cross.v @@ -0,0 +1,167 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_clk_cross +// +// Description: +// +// Crosses a CTRL Port request and response between two clock domains. +// + + +module ctrlport_clk_cross ( + + input wire rst, // Can be either clock domain, but must be glitch-free + + //--------------------------------------------------------------------------- + // Input Clock Domain (Slave Interface) + //--------------------------------------------------------------------------- + + input wire s_ctrlport_clk, + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [ 9:0] s_ctrlport_req_portid, + input wire [15:0] s_ctrlport_req_rem_epid, + input wire [ 9:0] s_ctrlport_req_rem_portid, + input wire [31:0] s_ctrlport_req_data, + input wire [ 3:0] s_ctrlport_req_byte_en, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + output wire s_ctrlport_resp_ack, + output wire [ 1:0] s_ctrlport_resp_status, + output wire [31:0] s_ctrlport_resp_data, + + //--------------------------------------------------------------------------- + // Output Clock Domain (Master Interface) + //--------------------------------------------------------------------------- + + input wire m_ctrlport_clk, + output wire m_ctrlport_req_wr, + output wire m_ctrlport_req_rd, + output wire [19:0] m_ctrlport_req_addr, + output wire [ 9:0] m_ctrlport_req_portid, + output wire [15:0] m_ctrlport_req_rem_epid, + output wire [ 9:0] m_ctrlport_req_rem_portid, + output wire [31:0] m_ctrlport_req_data, + output wire [ 3:0] m_ctrlport_req_byte_en, + output wire m_ctrlport_req_has_time, + output wire [63:0] m_ctrlport_req_time, + input wire m_ctrlport_resp_ack, + input wire [ 1:0] m_ctrlport_resp_status, + input wire [31:0] m_ctrlport_resp_data +); + + //--------------------------------------------------------------------------- + // Slave to Master Clock Crossing (Request) + //--------------------------------------------------------------------------- + + localparam REQ_W = + 1 + // ctrlport_req_wr + 1 + // ctrlport_req_rd + 20 + // ctrlport_req_addr + 10 + // ctrlport_req_portid + 16 + // ctrlport_req_rem_epid + 10 + // ctrlport_req_rem_portid + 32 + // ctrlport_req_data + 4 + // ctrlport_req_byte_en + 1 + // ctrlport_req_has_time + 64; // ctrlport_req_time + + wire [ REQ_W-1:0] s_req_flat; + wire [ REQ_W-1:0] m_req_flat; + wire m_req_flat_valid; + wire m_ctrlport_req_wr_tmp; + wire m_ctrlport_req_rd_tmp; + + assign s_req_flat = { + s_ctrlport_req_wr, + s_ctrlport_req_rd, + s_ctrlport_req_addr, + s_ctrlport_req_portid, + s_ctrlport_req_rem_epid, + s_ctrlport_req_rem_portid, + s_ctrlport_req_data, + s_ctrlport_req_byte_en, + s_ctrlport_req_has_time, + s_ctrlport_req_time + }; + + axi_fifo_2clk #( + .WIDTH (REQ_W), + .SIZE (3) + ) req_fifo ( + .reset (rst), + .i_aclk (s_ctrlport_clk), + .i_tdata (s_req_flat), + .i_tvalid (s_ctrlport_req_wr | s_ctrlport_req_rd), + .i_tready (), + .o_aclk (m_ctrlport_clk), + .o_tdata (m_req_flat), + .o_tready (1'b1), + .o_tvalid (m_req_flat_valid) + ); + + assign { + m_ctrlport_req_wr_tmp, + m_ctrlport_req_rd_tmp, + m_ctrlport_req_addr, + m_ctrlport_req_portid, + m_ctrlport_req_rem_epid, + m_ctrlport_req_rem_portid, + m_ctrlport_req_data, + m_ctrlport_req_byte_en, + m_ctrlport_req_has_time, + m_ctrlport_req_time + } = m_req_flat; + + assign m_ctrlport_req_wr = m_ctrlport_req_wr_tmp & m_req_flat_valid; + assign m_ctrlport_req_rd = m_ctrlport_req_rd_tmp & m_req_flat_valid; + + + //--------------------------------------------------------------------------- + // Master to Slave Clock Crossing (Response) + //--------------------------------------------------------------------------- + + localparam RESP_W = + 1 + // ctrlport_resp_ack, + 2 + // ctrlport_resp_status, + 32; // ctrlport_resp_data + + wire [RESP_W-1:0] m_resp_flat; + wire [RESP_W-1:0] s_resp_flat; + wire s_resp_flat_valid; + wire s_ctrlport_resp_ack_tmp; + + assign m_resp_flat = { + m_ctrlport_resp_ack, + m_ctrlport_resp_status, + m_ctrlport_resp_data + }; + + axi_fifo_2clk #( + .WIDTH (RESP_W), + .SIZE (3) + ) resp_fifo ( + .reset (rst), + .i_aclk (m_ctrlport_clk), + .i_tdata (m_resp_flat), + .i_tvalid (m_ctrlport_resp_ack), + .i_tready (), + .o_aclk (s_ctrlport_clk), + .o_tdata (s_resp_flat), + .o_tready (1'b1), + .o_tvalid (s_resp_flat_valid) + ); + + assign { + s_ctrlport_resp_ack_tmp, + s_ctrlport_resp_status, + s_ctrlport_resp_data + } = s_resp_flat; + + assign s_ctrlport_resp_ack = s_ctrlport_resp_ack_tmp & s_resp_flat_valid; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/utils/ctrlport_combiner.v b/fpga/usrp3/lib/rfnoc/utils/ctrlport_combiner.v new file mode 100644 index 000000000..591fadc27 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/ctrlport_combiner.v @@ -0,0 +1,222 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_combiner +// +// Description: +// +// This block is an arbiter that merges control-port interfaces. This block is +// used when you have multiple control-port masters that need to access a +// single slave. For example, a NoC block with multiple submodules that each +// need to read and/or write registers outside of themselves. +// +// This module combines the control-port requests from multiple masters into a +// single request for one slave. Simultaneous requests are handled in the order +// specified by PRIORITY. The responding ACK is routed back to the requester. +// +// Parameters: +// +// NUM_MASTERS : The number of control-port masters to connect to a single +// control-port slave. +// PRIORITY : Use PRIORITY = 0 for round robin arbitration, PRIORITY = 1 +// for priority arbitration (lowest number port serviced first). +// + + +module ctrlport_combiner #( + parameter NUM_MASTERS = 2, + parameter PRIORITY = 0 +) ( + input wire ctrlport_clk, + input wire ctrlport_rst, + + // Requests from multiple masters + input wire [ NUM_MASTERS-1:0] s_ctrlport_req_wr, + input wire [ NUM_MASTERS-1:0] s_ctrlport_req_rd, + input wire [20*NUM_MASTERS-1:0] s_ctrlport_req_addr, + input wire [10*NUM_MASTERS-1:0] s_ctrlport_req_portid, + input wire [16*NUM_MASTERS-1:0] s_ctrlport_req_rem_epid, + input wire [10*NUM_MASTERS-1:0] s_ctrlport_req_rem_portid, + input wire [32*NUM_MASTERS-1:0] s_ctrlport_req_data, + input wire [ 4*NUM_MASTERS-1:0] s_ctrlport_req_byte_en, + input wire [ NUM_MASTERS-1:0] s_ctrlport_req_has_time, + input wire [64*NUM_MASTERS-1:0] s_ctrlport_req_time, + // Responses to multiple masters + output reg [ NUM_MASTERS-1:0] s_ctrlport_resp_ack, + output reg [ 2*NUM_MASTERS-1:0] s_ctrlport_resp_status, + output reg [32*NUM_MASTERS-1:0] s_ctrlport_resp_data, + + // Request to a single slave + output reg m_ctrlport_req_wr, + output reg m_ctrlport_req_rd, + output reg [19:0] m_ctrlport_req_addr, + output reg [ 9:0] m_ctrlport_req_portid, + output reg [15:0] m_ctrlport_req_rem_epid, + output reg [ 9:0] m_ctrlport_req_rem_portid, + output reg [31:0] m_ctrlport_req_data, + output reg [ 3:0] m_ctrlport_req_byte_en, + output reg m_ctrlport_req_has_time, + output reg [63:0] m_ctrlport_req_time, + // Response from a single slave + input wire m_ctrlport_resp_ack, + input wire [ 1:0] m_ctrlport_resp_status, + input wire [31:0] m_ctrlport_resp_data +); + + reg [$clog2(NUM_MASTERS)-1:0] slave_sel = 0; // Tracks which slave port is + // currently being serviced. + reg req_load_output = 1'b0; + + + //--------------------------------------------------------------------------- + // Input Registers + //--------------------------------------------------------------------------- + // + // Latch each request until it can be serviced. Only one request per slave + // can be in progress at a time. + // + //--------------------------------------------------------------------------- + + reg [ NUM_MASTERS-1:0] req_valid = 0; + reg [ NUM_MASTERS-1:0] req_wr; + reg [ NUM_MASTERS-1:0] req_rd; + reg [20*NUM_MASTERS-1:0] req_addr; + reg [10*NUM_MASTERS-1:0] req_portid; + reg [16*NUM_MASTERS-1:0] req_rem_epid; + reg [10*NUM_MASTERS-1:0] req_rem_portid; + reg [32*NUM_MASTERS-1:0] req_data; + reg [ 4*NUM_MASTERS-1:0] req_byte_en; + reg [ NUM_MASTERS-1:0] req_has_time; + reg [64*NUM_MASTERS-1:0] req_time; + + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + req_valid <= 0; + end else begin : input_reg_gen + integer i; + for (i = 0; i < NUM_MASTERS; i = i + 1) begin + if (s_ctrlport_req_wr[i] | s_ctrlport_req_rd[i]) begin + // Mark this slave's request valid and save the request information + req_valid[i] <= 1'b1; + req_wr[i] <= s_ctrlport_req_wr[i]; + req_rd[i] <= s_ctrlport_req_rd[i]; + req_addr[20*i+:20] <= s_ctrlport_req_addr[20*i+:20]; + req_portid[10*i+:10] <= s_ctrlport_req_portid[10*i+:10]; + req_rem_epid[16*i+:16] <= s_ctrlport_req_rem_epid[16*i+:16]; + req_rem_portid[10*i+:10] <= s_ctrlport_req_rem_portid[10*i+:10]; + req_data[32*i+:32] <= s_ctrlport_req_data[32*i+:32]; + req_byte_en[4*i+:4] <= s_ctrlport_req_byte_en[4*i+:4]; + req_has_time[i] <= s_ctrlport_req_has_time[i]; + req_time[64*i+:64] <= s_ctrlport_req_time[64*i+:64]; + end + end + + // Clear the active request when it gets output + if (req_load_output) begin + req_valid[slave_sel] <= 1'b0; + end + end + end + + + //--------------------------------------------------------------------------- + // Arbitration State Machine + //--------------------------------------------------------------------------- + // + // This state machine tracks which slave port is being serviced and which to + // service next. This is done using a counter that simply checks each port in + // sequential order and then stops when it finds one that has a valid request. + // + //--------------------------------------------------------------------------- + + reg req_active = 0; // Indicates if there's a request being serviced + + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + slave_sel <= 0; + req_active <= 1'b0; + req_load_output <= 1'b0; + end else begin + req_load_output <= 1'b0; + + if (req_active) begin + // Wait until we get the response before we allow another request + if (m_ctrlport_resp_ack) begin + req_active <= 1'b0; + + // Go to the next slave so we don't service the same slave again + if(PRIORITY == 1 || slave_sel == NUM_MASTERS-1) + slave_sel <= 0; + else + slave_sel <= slave_sel + 1; + end + end else begin + // No active request in progress, so check if there's a new request on + // the selected slave. + if (req_valid[slave_sel]) begin + req_active <= 1'b1; + req_load_output <= 1'b1; + end else begin + // Nothing from this slave, so move to the next slave. + if (slave_sel == NUM_MASTERS-1) + slave_sel <= 0; + else + slave_sel <= slave_sel + 1; + end + end + end + end + + + //--------------------------------------------------------------------------- + // Output Register + //--------------------------------------------------------------------------- + // + // Here we load the active request for a single clock cycle and demultiplex + // the response back to the requesting master. + // + //--------------------------------------------------------------------------- + + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + m_ctrlport_req_wr <= 1'b0; + m_ctrlport_req_rd <= 1'b0; + end else begin : output_reg_gen + integer i; + + // Load the active request + if (req_load_output) begin + m_ctrlport_req_wr <= req_wr [slave_sel]; + m_ctrlport_req_rd <= req_rd [slave_sel]; + m_ctrlport_req_addr <= req_addr [20*slave_sel +: 20]; + m_ctrlport_req_portid <= req_portid [10*slave_sel +: 10]; + m_ctrlport_req_rem_epid <= req_rem_epid [16*slave_sel +: 16]; + m_ctrlport_req_rem_portid <= req_rem_portid[10*slave_sel +: 10]; + m_ctrlport_req_data <= req_data [32*slave_sel +: 32]; + m_ctrlport_req_byte_en <= req_byte_en [ 4*slave_sel +: 4]; + m_ctrlport_req_has_time <= req_has_time [slave_sel]; + m_ctrlport_req_time <= req_time [64*slave_sel +: 64]; + end else begin + m_ctrlport_req_wr <= 1'b0; + m_ctrlport_req_rd <= 1'b0; + end + + // Output any response to the master that made the request + for (i = 0; i < NUM_MASTERS; i = i + 1) begin + // Give the response data to all the slaves (no demux, to save logic) + s_ctrlport_resp_status[2*i +: 2] <= m_ctrlport_resp_status; + s_ctrlport_resp_data[32*i +: 32] <= m_ctrlport_resp_data; + + // Give the ack only to the master that made the request (use a demux) + if (i == slave_sel && m_ctrlport_resp_ack) begin + s_ctrlport_resp_ack[i] <= 1'b1; + end else begin + s_ctrlport_resp_ack[i] <= 1'b0; + end + end + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/utils/ctrlport_decoder.v b/fpga/usrp3/lib/rfnoc/utils/ctrlport_decoder.v new file mode 100644 index 000000000..74cdb307a --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/ctrlport_decoder.v @@ -0,0 +1,151 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_decoder +// +// Description: +// +// This block splits a single control port interface into multiple. It is used +// when you have a single master that needs to access multiple slaves. For +// example, a NoC block where the registers are implemented in multiple +// submodules that must be read/written by a single NoC shell. +// +// This version also implements address decoding. The request is passed to a +// slave only if the address falls within that slave's address space. Each +// slave is given an address space of 2**ADDR_W and the first slave starts at +// address BASE_ADDR. In other words, the request address is partitioned as +// shown below. +// +// |---------------- 32-bit -----------------| +// | Base | Port Num | Slave Addr | +// |-----------------------------------------| +// +// When passed to the slave, the base address and port number bits are stripped +// from the request address and only the SLAVE_ADDR_W-bit address is passed +// through. +// +// Parameters: +// +// NUM_SLAVES : Number of slave devices that you want to connect to master. +// BASE_ADDR : Base address for slave 0. This should be a power-of-2 +// multiple of the combined slave address spaces. +// SLAVE_ADDR_W : Number of address bits to allocate to each slave. +// + +module ctrlport_decoder #( + parameter NUM_SLAVES = 2, + parameter BASE_ADDR = 0, + parameter SLAVE_ADDR_W = 8 +) ( + input wire ctrlport_clk, + input wire ctrlport_rst, + + // Slave Interface + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + input wire [ 3:0] s_ctrlport_req_byte_en, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + output reg s_ctrlport_resp_ack = 1'b0, + output reg [ 1:0] s_ctrlport_resp_status, + output reg [31:0] s_ctrlport_resp_data, + + // Master Interfaces + output reg [ NUM_SLAVES-1:0] m_ctrlport_req_wr = 0, + output reg [ NUM_SLAVES-1:0] m_ctrlport_req_rd = 0, + output reg [20*NUM_SLAVES-1:0] m_ctrlport_req_addr = 0, + output reg [32*NUM_SLAVES-1:0] m_ctrlport_req_data, + output reg [ 4*NUM_SLAVES-1:0] m_ctrlport_req_byte_en, + output reg [ NUM_SLAVES-1:0] m_ctrlport_req_has_time, + output reg [64*NUM_SLAVES-1:0] m_ctrlport_req_time, + input wire [ NUM_SLAVES-1:0] m_ctrlport_resp_ack, + input wire [ 2*NUM_SLAVES-1:0] m_ctrlport_resp_status, + input wire [32*NUM_SLAVES-1:0] m_ctrlport_resp_data +); + + localparam PORT_NUM_W = $clog2(NUM_SLAVES); + localparam PORT_NUM_POS = SLAVE_ADDR_W; + localparam BASE_ADDR_W = 20 - (SLAVE_ADDR_W + PORT_NUM_W); + localparam BASE_ADDR_POS = SLAVE_ADDR_W + PORT_NUM_W; + localparam [19:0] BASE_ADDR_MASK = { BASE_ADDR_W {1'b1}} << BASE_ADDR_POS; + + + //--------------------------------------------------------------------------- + // Split the requests among the slaves + //--------------------------------------------------------------------------- + + wire [NUM_SLAVES-1:0] decoder; + + genvar i; + for (i = 0; i < NUM_SLAVES; i = i+1) begin : gen_split + // Check if the upper bits of the request address match each slave. If the + // address matches, set the corresponding decoder[] bit. + if (PORT_NUM_W == 0) begin + // Only one port in this case, so there are no port number bits to check + assign decoder[i] = ((s_ctrlport_req_addr & BASE_ADDR_MASK) == BASE_ADDR); + end else begin + assign decoder[i] = ((s_ctrlport_req_addr & BASE_ADDR_MASK) == BASE_ADDR) && + (s_ctrlport_req_addr[PORT_NUM_POS +: PORT_NUM_W] == i); + end + + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + m_ctrlport_req_wr[i] <= 1'b0; + m_ctrlport_req_rd[i] <= 1'b0; + end else begin + // Mask WR and RD based on address decoding + m_ctrlport_req_wr[i] <= s_ctrlport_req_wr & decoder[i]; + m_ctrlport_req_rd[i] <= s_ctrlport_req_rd & decoder[i]; + + // Other values pass through to all slaves, but should be ignored + // unless the corresponding WR or RD is not asserted. + m_ctrlport_req_data [32*i +: 32] <= s_ctrlport_req_data; + m_ctrlport_req_byte_en [4*i +: 4] <= s_ctrlport_req_byte_en; + m_ctrlport_req_has_time[i] <= s_ctrlport_req_has_time; + m_ctrlport_req_time [64*i +: 64] <= s_ctrlport_req_time; + + // Pass through only the relevant slave bits + m_ctrlport_req_addr[20*i+:20] <= 20'b0; + m_ctrlport_req_addr[20*i+:SLAVE_ADDR_W] <= s_ctrlport_req_addr[SLAVE_ADDR_W-1:0]; + end + end + end + + + //--------------------------------------------------------------------------- + // Decode the responses + //--------------------------------------------------------------------------- + + reg [31:0] data; + reg [ 1:0] status; + reg ack = 0; + + // Take the responses and mask them with ack, then OR them together + always @(*) begin : comb_decode + integer s; + data = 0; + status = 0; + ack = 0; + for (s = 0; s < NUM_SLAVES; s = s+1) begin + data = data | (m_ctrlport_resp_data [s*32 +: 32] & {32{m_ctrlport_resp_ack[s]}}); + status = status | (m_ctrlport_resp_status[s* 2 +: 2] & { 2{m_ctrlport_resp_ack[s]}}); + ack = ack | m_ctrlport_resp_ack[s]; + end + end + + // Register the output to break combinatorial path + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + s_ctrlport_resp_ack <= 0; + end else begin + s_ctrlport_resp_data <= data; + s_ctrlport_resp_status <= status; + s_ctrlport_resp_ack <= ack; + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/utils/ctrlport_decoder_param.v b/fpga/usrp3/lib/rfnoc/utils/ctrlport_decoder_param.v new file mode 100644 index 000000000..f2f4a438c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/ctrlport_decoder_param.v @@ -0,0 +1,169 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_decoder_param +// +// Description: +// +// This block splits a single control port interface into multiple. It is +// used when you have a single master that needs to access multiple slaves. +// For example, a NoC block where the registers are implemented in multiple +// submodules that must be read/written by a single NoC shell. +// +// This version also implements address decoding. The request is passed to a +// slave only if the address falls within that slave's address space. Each +// slave can have a unique base address and address space size. The address +// space is broken up as follows. +// +// PORT_BASE[0*20 +: 20] = Port 0 base address +// │ ┐ +// │ ├── 2**PORT_ADDR_W[0*32 +: 32] bytes for slave 0 +// │ ┘ +// . +// . +// PORT_BASE[1*20 +: 20] = Port 1 base address +// │ ┐ +// │ ├── 2**PORT_ADDR_W[1*32 +: 32] bytes for slave 1 +// │ ┘ +// . +// . +// +// When passed to the slave, the base address is stripped from the request +// address so that only the PORT_ADDR_W-bit address is passed through. +// +// Parameters: +// +// NUM_SLAVES : The number of slaves to connect to a master. +// +// PORT_BASE : Base addresses to use fore each slave. This is a +// concatenation of 20-bit addresses, where the right-most +// (least-significant) 20 bits corresponds to slave 0. Each +// address must be a multiple of 2**PORT_ADDR_W, where +// PORT_ADDR_W is the number of address bits allocated to that +// slave. +// +// PORT_ADDR_W : Number of address bits to allocate to each slave. This is a +// concatenation of 32-bit integers, where the right-most +// (least-significant) 32 bits corresponds to the address space +// for slave 0. +// + +module ctrlport_decoder_param #( + parameter NUM_SLAVES = 4, + parameter PORT_BASE = { 20'h300, 20'h200, 20'h100, 20'h000 }, + parameter PORT_ADDR_W = { 32'd8, 32'd8, 32'd8, 32'd8 } +) ( + input wire ctrlport_clk, + input wire ctrlport_rst, + + // Slave Interface + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + input wire [ 3:0] s_ctrlport_req_byte_en, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + output reg s_ctrlport_resp_ack = 1'b0, + output reg [ 1:0] s_ctrlport_resp_status, + output reg [31:0] s_ctrlport_resp_data, + + // Master Interfaces + output reg [ NUM_SLAVES-1:0] m_ctrlport_req_wr = 0, + output reg [ NUM_SLAVES-1:0] m_ctrlport_req_rd = 0, + output reg [20*NUM_SLAVES-1:0] m_ctrlport_req_addr = 0, + output reg [32*NUM_SLAVES-1:0] m_ctrlport_req_data, + output reg [ 4*NUM_SLAVES-1:0] m_ctrlport_req_byte_en, + output reg [ NUM_SLAVES-1:0] m_ctrlport_req_has_time, + output reg [64*NUM_SLAVES-1:0] m_ctrlport_req_time, + input wire [ NUM_SLAVES-1:0] m_ctrlport_resp_ack, + input wire [ 2*NUM_SLAVES-1:0] m_ctrlport_resp_status, + input wire [32*NUM_SLAVES-1:0] m_ctrlport_resp_data +); + + //--------------------------------------------------------------------------- + // Address Decode Logic + //--------------------------------------------------------------------------- + // + // Check if the upper bits of the request address match each slave. If the + // address matches, set the corresponding dec_mask[] bit. + // + //--------------------------------------------------------------------------- + + wire [NUM_SLAVES-1:0] dec_mask; // Address decoder mask + + genvar i; + + for (i = 0; i < NUM_SLAVES; i = i+1) begin : gen_dec_mask + localparam [19:0] BASE_ADDR = PORT_BASE [i*20 +: 20]; + localparam [31:0] ADDR_W = PORT_ADDR_W[i*32 +: 32]; + assign dec_mask[i] = ~|((s_ctrlport_req_addr ^ BASE_ADDR) & ((~0) << ADDR_W)); + end + + + //--------------------------------------------------------------------------- + // Split the requests among the slaves + //--------------------------------------------------------------------------- + + for (i = 0; i < NUM_SLAVES; i = i+1) begin : gen_split + localparam [31:0] ADDR_W = PORT_ADDR_W[i*32 +: 32]; + + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + m_ctrlport_req_wr[i] <= 1'b0; + m_ctrlport_req_rd[i] <= 1'b0; + end else begin + // Mask WR and RD based on address decoding + m_ctrlport_req_wr[i] <= s_ctrlport_req_wr & dec_mask[i]; + m_ctrlport_req_rd[i] <= s_ctrlport_req_rd & dec_mask[i]; + + // Other values pass through to all slaves, but should be ignored + // unless WR or RD is asserted. + m_ctrlport_req_data [32*i +: 32] <= s_ctrlport_req_data; + m_ctrlport_req_byte_en [4*i +: 4] <= s_ctrlport_req_byte_en; + m_ctrlport_req_has_time[i] <= s_ctrlport_req_has_time; + m_ctrlport_req_time [64*i +: 64] <= s_ctrlport_req_time; + + // Mask the address bits to that of the slaves address space. + m_ctrlport_req_addr[20*i +: 20] <= 20'b0; + m_ctrlport_req_addr[20*i +: ADDR_W] <= s_ctrlport_req_addr[ADDR_W-1 : 0]; + end + end + end + + + //--------------------------------------------------------------------------- + // Decode the responses + //--------------------------------------------------------------------------- + + reg [31:0] data; + reg [ 1:0] status; + reg ack = 0; + + // Take the responses and mask them with ack, then OR them together + always @(*) begin : comb_decode + integer s; + data = 0; + status = 0; + ack = 0; + for (s = 0; s < NUM_SLAVES; s = s+1) begin + data = data | (m_ctrlport_resp_data [s*32 +: 32] & {32{m_ctrlport_resp_ack[s]}}); + status = status | (m_ctrlport_resp_status[s* 2 +: 2] & { 2{m_ctrlport_resp_ack[s]}}); + ack = ack | m_ctrlport_resp_ack[s]; + end + end + + // Register the output to break combinatorial path + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + s_ctrlport_resp_ack <= 0; + end else begin + s_ctrlport_resp_data <= data; + s_ctrlport_resp_status <= status; + s_ctrlport_resp_ack <= ack; + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/utils/ctrlport_reg_ro.v b/fpga/usrp3/lib/rfnoc/utils/ctrlport_reg_ro.v new file mode 100644 index 000000000..c21667358 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/ctrlport_reg_ro.v @@ -0,0 +1,181 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_reg_ro +// +// Description: +// +// Implements a read-only register on a CTRL Port bus. The actual register +// bits are driven from outside of this module and passed in through the +// "value_in" input port. All input addresses are assumed to be 32-bit word +// aligned. +// +// The width of the register is configurable. The register will take up the +// full power-of-2 address region, with a minimum of a 4-byte region. For +// example: +// +// WIDTH (Bits) │ Address Space (Bytes) +// ──────────────┼─────────────────────── +// 1 to 32 │ 4 +// 33 to 64 │ 8 +// 64 to 128 │ 16 +// etc. │ etc. +// +// When COHERENT is true and the WIDTH is larger than a single CTRL Port word +// (32 bits), reading the least-significant word of the register causes the +// other words of the register to be read and saved in a cache register on +// the same clock cycle. Reading the upper words of the register will always +// read from the cached copy. This allows reads of large, multi-word +// registers to be coherent. This is very important for registers in which +// there is a relationship between the upper and lower bits, such as in a +// counter which could change or roll over between 32-bit reads. The +// least-significant word MUST always be read first when COHERENT is true. +// +// Parameters: +// +// ADDR : Byte address to use for this register. This address must be +// aligned to the size of the register. +// WIDTH : Width of register to implement in bits. This determines the +// width of the "value_in" input and the amount of address space +// used by the register, which is always a power of 2. +// COHERENT : Setting to 1 implements additional logic so that register reads +// maintain coherency. Setting to 0 removes this logic, so that +// each 32-bit word of the register is treated independently. +// +// Ports: +// +// *ctrlport* : CTRL Port interface. +// value_in : The current value of the register. +// + + +module ctrlport_reg_ro #( + parameter [ 19:0] ADDR = 0, + parameter WIDTH = 32, + parameter COHERENT = 0 +) ( + input wire ctrlport_clk, + + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + output reg s_ctrlport_resp_ack, + output wire [ 1:0] s_ctrlport_resp_status, + output reg [31:0] s_ctrlport_resp_data, + + input wire [WIDTH-1:0] value_in +); + + //--------------------------------------------------------------------------- + // Functions + //--------------------------------------------------------------------------- + + function automatic integer max(input integer a, b); + max = a > b ? a : b; + endfunction + + + //--------------------------------------------------------------------------- + // Local Parameters + //--------------------------------------------------------------------------- + + // Calculate the number of bytes of address space this register will take up. + // The minimum size is a 32-bit register (4 bytes). + localparam NUM_BYTES = max(4, 2**$clog2(WIDTH) / 8); + + // Calculate the number of bits needed to index each byte of this register. + localparam BYTE_ADDR_W = $clog2(NUM_BYTES); + + // Calculate the number of bits needed to index each 32-bit word of this + // register. + localparam WORD_ADDR_W = BYTE_ADDR_W-2; + + + //--------------------------------------------------------------------------- + // Parameter Checking + //--------------------------------------------------------------------------- + + // Make sure WIDTH is valid + if (WIDTH < 1) begin + WIDTH_must_be_at_least_1(); + end + + // Make sure the address is word-aligned to the size of the register + if (ADDR[BYTE_ADDR_W-1:0] != 0) begin + ADDR_must_be_aligned_to_the_size_of_the_register(); + end + + + //--------------------------------------------------------------------------- + // Resize Input Value + //--------------------------------------------------------------------------- + + // Use full size to simplify indexing. Unused bits will be optimized away. + reg [NUM_BYTES*8-1:0] reg_val = 0; + + always @(*) begin + reg_val <= 0; + reg_val[WIDTH-1:0] <= value_in; + end + + + //--------------------------------------------------------------------------- + // Read Logic + //--------------------------------------------------------------------------- + + reg [WIDTH-1:0] cache_reg; + + assign s_ctrlport_resp_status = 0; // Status is always "OK" (0) + + // + // Coherent implementation + // + if (WIDTH > 32 && COHERENT) begin : gen_coherent + // In this case we want the upper bits, when read separately, to be + // coherent with the lower bits. So we register the upper bits when the + // least-significant word is read. + + always @(posedge ctrlport_clk) begin + // Check if any part of this register is being addressed + if (s_ctrlport_req_addr[19 : BYTE_ADDR_W] == ADDR[19 : BYTE_ADDR_W] && s_ctrlport_req_rd) begin + s_ctrlport_resp_ack <= 1'b1; + + // Check if we're reading the least-significant word + if (s_ctrlport_req_addr[BYTE_ADDR_W-1 : 2] == 0) begin + s_ctrlport_resp_data <= reg_val[31:0]; + cache_reg <= reg_val; // Unused bits will be optimized away + + // Otherwise, grab the word that's being addressed from the cached value + end else begin + s_ctrlport_resp_data <= cache_reg[s_ctrlport_req_addr[2 +: WORD_ADDR_W]*32 +: 32]; + end + end else begin + s_ctrlport_resp_ack <= 1'b0; + end + end + + // + // Non-coherent implementation + // + end else begin : gen_no_coherent + // In this case, coherency is not required, so we just return the word + // that's being addressed. + + always @(posedge ctrlport_clk) begin + // Check if any part of this register is being addressed + if (s_ctrlport_req_addr[19 : BYTE_ADDR_W] == ADDR[19 : BYTE_ADDR_W] && s_ctrlport_req_rd) begin + s_ctrlport_resp_ack <= 1'b1; + if (WORD_ADDR_W > 0) begin + // Read back only the word of the register being addressed + s_ctrlport_resp_data <= reg_val[s_ctrlport_req_addr[2 +: WORD_ADDR_W]*32 +: 32]; + end else begin + s_ctrlport_resp_data <= reg_val; + end + end else begin + s_ctrlport_resp_ack <= 1'b0; + end + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/utils/ctrlport_reg_rw.v b/fpga/usrp3/lib/rfnoc/utils/ctrlport_reg_rw.v new file mode 100644 index 000000000..7e74b1422 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/ctrlport_reg_rw.v @@ -0,0 +1,247 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_reg_rw +// +// Description: +// +// Implements a read/write register on a CTRL Port bus. CTRL Port byte +// enables are supported on writes. All input addresses are assumed to be +// 32-bit word aligned. +// +// The width of the register is configurable. The register will take up the +// full power-of-2 address region, with a minimum of a 4-byte region. For +// example: +// +// WIDTH (Bits) │ Address Space (Bytes) +// ──────────────┼─────────────────────── +// 1 to 32 │ 4 +// 33 to 64 │ 8 +// 64 to 128 │ 16 +// etc. │ etc. +// +// When COHERENCY is true and the WIDTH is larger than a single CTRL Port +// word (32 bits), writing the least-significant words of the register causes +// them to be saved in a cache register and does not immediately update those +// words in the register. Writing the most-significant word of the register +// causes all the words to be simultaneously written to the register. This +// allows writes of large, multi-word registers to be coherent. This is very +// important for registers in which there is a relationship between the upper +// and lower bits, such as in a counter value in which changing only part of +// the word at a time could be seen as a large change when in fact the final +// change is small. The most-significant word MUST always be written last +// when COHERENCY is true. +// +// Parameters: +// +// ADDR : Byte address to use for this register. This address must be +// aligned to the size of the register. +// WIDTH : Width of register to implement in bits. This determines the +// width of the "value_out" input and the amount of address space +// used by the register, which is always a power of 2. +// COHERENT : Setting to 1 implements additional logic so that register +// writes maintain coherency. Setting to 0 removes this logic, so +// that each 32-bit word of the register is treated independently. +// RESET_VAL : Value to give the register at power-on and at reset. +// +// Ports: +// +// *ctrlport* : CTRL Port interface. +// value_out : The current value of the register. +// written : A strobe (single-cycle pulse) that indicates when the +// register was written. The new value may or may not be the +// same as the old value. +// + + +module ctrlport_reg_rw #( + parameter [ 19:0] ADDR = 0, + parameter WIDTH = 32, + parameter COHERENT = 0, + parameter [WIDTH-1:0] RESET_VAL = 'h0 +) ( + input wire ctrlport_clk, + input wire ctrlport_rst, + + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + input wire [ 3:0] s_ctrlport_req_byte_en, + output wire s_ctrlport_resp_ack, + output wire [ 1:0] s_ctrlport_resp_status, + output reg [31:0] s_ctrlport_resp_data, + + output wire [WIDTH-1:0] value_out, + output reg written +); + + //--------------------------------------------------------------------------- + // Functions + //--------------------------------------------------------------------------- + + function automatic integer max(input integer a, b); + max = a > b ? a : b; + endfunction + + + //--------------------------------------------------------------------------- + // Local Parameters + //--------------------------------------------------------------------------- + + // Calculate the number of bytes of address space this register will take up. + // The minimum size is a 32-bit register (4 bytes). + localparam NUM_BYTES = max(4, 2**$clog2(WIDTH)/8); + + // Calculate the number of bits needed to index each byte of this register. + localparam BYTE_ADDR_W = $clog2(NUM_BYTES); + + // Calculate the number of bits needed to index each 32-bit word of this + // register. + localparam WORD_ADDR_W = BYTE_ADDR_W-2; + + + //--------------------------------------------------------------------------- + // Parameter Checking + //--------------------------------------------------------------------------- + + // Make sure WIDTH is valid + if (WIDTH < 1) begin + WIDTH_must_be_at_least_1(); + end + + // Make sure the address is word-aligned to the size of the register + if (ADDR[BYTE_ADDR_W-1:0] != 0) begin + ADDR_must_be_aligned_to_the_size_of_the_register(); + end + + + //--------------------------------------------------------------------------- + // Write Logic + //--------------------------------------------------------------------------- + + // Use full size to simplify indexing. Unused bits will be optimized away. + reg [8*NUM_BYTES-1:0] reg_val = 0; + + reg [8*NUM_BYTES-1:0] write_cache_reg; + reg [ NUM_BYTES-1:0] write_en_cache_reg; + + reg s_ctrlport_resp_ack_wr; + + integer b, w; + + // + // Coherent implementation + // + if (WIDTH > 32 && COHERENT) begin : gen_coherent + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + reg_val <= RESET_VAL; + written <= 1'b0; + end else begin + // Check if any part of this register is being written to + if (s_ctrlport_req_addr[19 : BYTE_ADDR_W] == ADDR[19 : BYTE_ADDR_W] && s_ctrlport_req_wr) begin + s_ctrlport_resp_ack_wr <= 1'b1; + + // Check if we're writing the most-significant word + if (s_ctrlport_req_addr[BYTE_ADDR_W-1 : 2] == {BYTE_ADDR_W-2{1'b1}}) begin + written <= 1'b1; + + // Iterate over the 4 bytes, updating each based on byte_en + for (b = 0; b < 4; b = b+1) begin + // Update the most-significant word from the input + if(s_ctrlport_req_byte_en[b]) begin + reg_val[32*(NUM_BYTES/4-1)+b*8 +: 8] <= s_ctrlport_req_data[8*b +: 8]; + end + + // Update the least-significant words from the cache + for (w = 0; w < NUM_BYTES/4; w = w+1) begin + if (write_en_cache_reg[b]) begin + reg_val[32*w+b*8 +: 8] <= write_cache_reg[32*w+b*8 +: 8]; + end + end + end + + // We're writing one of the least-significant words, so just cache + // the values written. + end else begin + w = s_ctrlport_req_addr[2 +: WORD_ADDR_W]; + write_cache_reg[w*32 +: 32] <= s_ctrlport_req_data; + write_en_cache_reg[w*4 +: 4] <= s_ctrlport_req_byte_en; + end + + end else begin + s_ctrlport_resp_ack_wr <= 1'b0; + written <= 1'b0; + end + end + end + + // + // Non-coherent implementation + // + end else begin : gen_no_coherent + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + reg_val <= RESET_VAL; + written <= 1'b0; + end else begin + // Check if any part of the word is begin written to + if (s_ctrlport_req_addr[19 : BYTE_ADDR_W] == ADDR[19 : BYTE_ADDR_W] && s_ctrlport_req_wr) begin + for (b = 0; b < 4; b = b + 1) begin + if (s_ctrlport_req_byte_en[b]) begin + if (WORD_ADDR_W > 0) begin + // Update only the word of the register being addressed. "max" + // is needed by Vivado here to elaborate when WORD_ADDR_W is 0. + w = s_ctrlport_req_addr[2 +: max(1, WORD_ADDR_W)]; + reg_val[w*32+b*8 +: 8] <= s_ctrlport_req_data[8*b +: 8]; + end else begin + reg_val[b*8 +: 8] <= s_ctrlport_req_data[8*b +: 8]; + end + end + end + s_ctrlport_resp_ack_wr <= 1'b1; + written <= 1'b1; + end else begin + s_ctrlport_resp_ack_wr <= 1'b0; + written <= 1'b0; + end + end + end + + end + + + //--------------------------------------------------------------------------- + // Read Logic + //--------------------------------------------------------------------------- + + reg s_ctrlport_resp_ack_rd; + + assign s_ctrlport_resp_status = 0; // Status is always "OK" (0) + + assign value_out = reg_val[WIDTH-1:0]; + + // Because the register is only changed by software, read coherency is not + // required, so we just return the word that's being addressed. + always @(posedge ctrlport_clk) begin + // Check if any part of this register is being addressed + if (s_ctrlport_req_addr[19 : BYTE_ADDR_W] == ADDR[19 : BYTE_ADDR_W] && s_ctrlport_req_rd) begin + s_ctrlport_resp_ack_rd <= 1'b1; + if (WORD_ADDR_W > 0) begin + // Read back only the word of the register being addressed + s_ctrlport_resp_data <= reg_val[s_ctrlport_req_addr[2 +: WORD_ADDR_W]*32 +: 32]; + end else begin + s_ctrlport_resp_data <= reg_val[31:0]; + end + end else begin + s_ctrlport_resp_ack_rd <= 1'b0; + end + end + + // Combine read/write ack + assign s_ctrlport_resp_ack = s_ctrlport_resp_ack_wr | s_ctrlport_resp_ack_rd; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/utils/ctrlport_resp_combine.v b/fpga/usrp3/lib/rfnoc/utils/ctrlport_resp_combine.v new file mode 100644 index 000000000..e3461cb2c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/ctrlport_resp_combine.v @@ -0,0 +1,70 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_resp_combine +// +// Description: +// +// This module combines the control-port responses from multiple slave blocks +// into a single response for the master. This is done by using ack bit to +// mask all bits of the responses then ORing all the results together onto a +// single response bus. This is valid because only one block is allowed to +// respond to a single request. +// +// Note that no special logic is required to split the requests from the +// master among multiple slaves. A single master request interface can be +// directly connected to all the slaves without issue. +// +// Parameters: +// +// NUM_SLAVES : The number of slaves you want to connect to a master. +// + + +module ctrlport_resp_combine #( + parameter NUM_SLAVES = 2 +) ( + input wire ctrlport_clk, + input wire ctrlport_rst, + + // Responses from multiple slaves + input wire [ NUM_SLAVES-1:0] m_ctrlport_resp_ack, + input wire [ 2*NUM_SLAVES-1:0] m_ctrlport_resp_status, + input wire [32*NUM_SLAVES-1:0] m_ctrlport_resp_data, + + // Response to a single master + output reg s_ctrlport_resp_ack, + output reg [ 1:0] s_ctrlport_resp_status, + output reg [31:0] s_ctrlport_resp_data +); + + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + s_ctrlport_resp_data <= 0; + s_ctrlport_resp_ack <= 0; + end else begin : or_reg_resp + reg [31:0] data; + reg [ 1:0] status; + reg ack; + integer s; + + // Take the responses and mask them with ack then OR them together + data = 0; + status = 0; + ack = 0; + for (s = 0; s < NUM_SLAVES; s = s+1) begin + data = data | (m_ctrlport_resp_data [s*32 +: 32] & {32{m_ctrlport_resp_ack[s]}}); + status = status | (m_ctrlport_resp_status[s* 2 +: 2] & { 2{m_ctrlport_resp_ack[s]}}); + ack = ack | m_ctrlport_resp_ack[s]; + end + + // Register the output to break combinatorial path + s_ctrlport_resp_data <= data; + s_ctrlport_resp_status <= status; + s_ctrlport_resp_ack <= ack; + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/utils/ctrlport_splitter.v b/fpga/usrp3/lib/rfnoc/utils/ctrlport_splitter.v new file mode 100644 index 000000000..23ef13585 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/ctrlport_splitter.v @@ -0,0 +1,114 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_splitter +// +// Description: +// +// This block splits a single control port interface into multiple. It is used +// when you have a single master that needs to access multiple slaves. For +// example, a NoC block where the registers are implemented in multiple +// submodules that must be read/written by a single NoC shell. +// +// Note that this block does not do any address decoding, so the connected +// slaves must use non-overlapping address spaces. +// +// This module takes the request received by its single slave interface and +// outputs it on all its master interfaces. In the opposite direction, it takes +// the responses received by its multiple master interfaces and combines them +// into a single response on its slave interface. This is done by using the ack +// bit of each response to mask the other bits of the response, then OR'ing all +// of the masked responses together onto a single response bus. This is valid +// because only one block is allowed to respond to a single request. +// +// Parameters: +// +// NUM_SLAVES : The number of slaves you want to connect to a master. +// + + +module ctrlport_splitter #( + parameter NUM_SLAVES = 2 +) ( + input wire ctrlport_clk, + input wire ctrlport_rst, + + // Slave Interface + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + input wire [ 3:0] s_ctrlport_req_byte_en, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + output reg s_ctrlport_resp_ack = 1'b0, + output reg [ 1:0] s_ctrlport_resp_status, + output reg [31:0] s_ctrlport_resp_data, + + // Master Interfaces + output wire [ NUM_SLAVES-1:0] m_ctrlport_req_wr, + output wire [ NUM_SLAVES-1:0] m_ctrlport_req_rd, + output wire [20*NUM_SLAVES-1:0] m_ctrlport_req_addr, + output wire [32*NUM_SLAVES-1:0] m_ctrlport_req_data, + output wire [ 4*NUM_SLAVES-1:0] m_ctrlport_req_byte_en, + output wire [ NUM_SLAVES-1:0] m_ctrlport_req_has_time, + output wire [64*NUM_SLAVES-1:0] m_ctrlport_req_time, + input wire [ NUM_SLAVES-1:0] m_ctrlport_resp_ack, + input wire [ 2*NUM_SLAVES-1:0] m_ctrlport_resp_status, + input wire [32*NUM_SLAVES-1:0] m_ctrlport_resp_data +); + + //--------------------------------------------------------------------------- + // Split the requests among the slaves + //--------------------------------------------------------------------------- + + generate + genvar i; + for (i = 0; i < NUM_SLAVES; i = i+1) begin : gen_split + // No special logic is required to split the requests from the master among + // multiple slaves. + assign m_ctrlport_req_wr[i] = s_ctrlport_req_wr; + assign m_ctrlport_req_rd[i] = s_ctrlport_req_rd; + assign m_ctrlport_req_addr[20*i+:20] = s_ctrlport_req_addr; + assign m_ctrlport_req_data[32*i+:32] = s_ctrlport_req_data; + assign m_ctrlport_req_byte_en[4*i+:4] = s_ctrlport_req_byte_en; + assign m_ctrlport_req_has_time[i] = s_ctrlport_req_has_time; + assign m_ctrlport_req_time[64*i+:64] = s_ctrlport_req_time; + end + endgenerate + + //--------------------------------------------------------------------------- + // Decode the responses + //--------------------------------------------------------------------------- + + reg [31:0] data; + reg [ 1:0] status; + reg ack = 0; + + // Take the responses and mask them with ack, then OR them together + always @(*) begin : comb_decode + integer s; + data = 0; + status = 0; + ack = 0; + for (s = 0; s < NUM_SLAVES; s = s+1) begin + data = data | (m_ctrlport_resp_data [s*32 +: 32] & {32{m_ctrlport_resp_ack[s]}}); + status = status | (m_ctrlport_resp_status[s* 2 +: 2] & { 2{m_ctrlport_resp_ack[s]}}); + ack = ack | m_ctrlport_resp_ack[s]; + end + end + + // Register the output to break combinatorial path + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + s_ctrlport_resp_ack <= 0; + end else begin + s_ctrlport_resp_data <= data; + s_ctrlport_resp_status <= status; + s_ctrlport_resp_ack <= ack; + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/utils/ctrlport_terminator.v b/fpga/usrp3/lib/rfnoc/utils/ctrlport_terminator.v new file mode 100644 index 000000000..2d087e53e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/ctrlport_terminator.v @@ -0,0 +1,50 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_terminator.v +// Description: +// Returns an error for all ctrlport requests in given address range. + +module ctrlport_terminator #( + parameter START_ADDRESS = 0, // first address to generate response for + parameter LAST_ADDRESS = 32 // last address (including) to generate response for +)( + //--------------------------------------------------------------- + // ControlPort slave + //--------------------------------------------------------------- + input wire ctrlport_clk, + input wire ctrlport_rst, + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + + output reg s_ctrlport_resp_ack, + output wire [ 1:0] s_ctrlport_resp_status, + output wire [31:0] s_ctrlport_resp_data +); + +`include "../core/ctrlport.vh" +//vhook_nowarn s_ctrlport_req_addr +//vhook_nowarn s_ctrlport_req_data + +// drive acknowledgement on requests but not on reset +always @(posedge ctrlport_clk) begin + if (ctrlport_clk) begin + if (ctrlport_rst) begin + s_ctrlport_resp_ack <= 1'b0; + end else if ((s_ctrlport_req_addr >= START_ADDRESS) && (s_ctrlport_req_addr <= LAST_ADDRESS)) begin + s_ctrlport_resp_ack <= s_ctrlport_req_wr | s_ctrlport_req_rd; + end else begin + s_ctrlport_resp_ack <= 1'b0; + end + end +end + +// other outputs are fixed +assign s_ctrlport_resp_status = CTRL_STS_CMDERR; +assign s_ctrlport_resp_data = { CTRLPORT_DATA_W {1'b0}}; + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/utils/ctrlport_timer.v b/fpga/usrp3/lib/rfnoc/utils/ctrlport_timer.v new file mode 100644 index 000000000..293ee6559 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/ctrlport_timer.v @@ -0,0 +1,122 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_timer +// Description: +// The Control-Port timer module converts an asynchronous timed +// transaction into a synchronous blocking transaction. This +// module will use the input req_has_time and req_time fields and +// produce an output transaction that will execute when the requested +// time is current. The module does not pass the has_time and time +// signals out because they are no longer relevant. The current time +// is an input to this module, and must be a monotonic counter that +// updates every time the time strobe is asserted. +// +// Parameters: +// - PRECISION_BITS : The number of bits to ignore when performing a +// time comparison to determine execution time. +// - EXEC_LATE_CMDS : If a command is late, a TSERR response is sent. +// If EXEC_LATE_CMDS = 1, then the late command will +// be passed to the output regardless of the TSERR. +// +// Signals: +// - time_now* : The time_now signal is the current time and the stb +// signal indicates that the time_now is valid. +// - s_ctrlport_* : The slave Control-Port bus. +// This must have the has_time and time signals. +// - m_ctrlport_* : The master Control-Port bus. +// This will not have the has_time and time signals. + +module ctrlport_timer #( + parameter PRECISION_BITS = 0, + parameter [0:0] EXEC_LATE_CMDS = 0 +)( + // Clocks and Resets + input wire clk, + input wire rst, + // Timestamp (synchronous to clk) + input wire [63:0] time_now, + input wire time_now_stb, + // Control Port Master (Request) + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + input wire [3:0] s_ctrlport_req_byte_en, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + // Control Port Slave (Response) + output wire s_ctrlport_resp_ack, + output wire [1:0] s_ctrlport_resp_status, + output wire [31:0] s_ctrlport_resp_data, + // Control Port Master (Request) + output wire m_ctrlport_req_wr, + output wire m_ctrlport_req_rd, + output wire [19:0] m_ctrlport_req_addr, + output wire [31:0] m_ctrlport_req_data, + output wire [3:0] m_ctrlport_req_byte_en, + // Control Port Master (Response) + input wire m_ctrlport_resp_ack, + input wire [1:0] m_ctrlport_resp_status, + input wire [31:0] m_ctrlport_resp_data +); + + `include "../core/rfnoc_chdr_utils.vh" + `include "../core/rfnoc_axis_ctrl_utils.vh" + + // Control triggers: + // - pending: A command is waiting on the input port + // - ontime: The timed command is due for execution (on time) + // - late: The timed command is late + // - exec: Execute the command (pass it to the output) + // - consume: Consume the input command + wire pending, ontime, late, exec, consume; + // Cached values for input command + wire cached_req_wr, cached_req_rd; + wire [19:0] cached_req_addr; + wire [31:0] cached_req_data; + wire [3:0] cached_req_byte_en; + wire cached_req_has_time; + wire [63:0] cached_req_time; + + axi_fifo_flop #(.WIDTH(1+1+20+32+4+1+64)) req_cache_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({s_ctrlport_req_wr, s_ctrlport_req_rd, s_ctrlport_req_addr, s_ctrlport_req_data, + s_ctrlport_req_byte_en, s_ctrlport_req_has_time, s_ctrlport_req_time}), + .i_tvalid(s_ctrlport_req_wr | s_ctrlport_req_rd), .i_tready(), + .o_tdata({cached_req_wr, cached_req_rd, cached_req_addr, cached_req_data, + cached_req_byte_en, cached_req_has_time, cached_req_time}), + .o_tvalid(pending), .o_tready(consume), + .occupied(), .space() + ); + + // Command is on time + assign ontime = cached_req_has_time && pending && time_now_stb && + (cached_req_time[63:PRECISION_BITS] == time_now[63:PRECISION_BITS]); + // Command is late + assign late = cached_req_has_time && pending && time_now_stb && + (cached_req_time[63:PRECISION_BITS] < time_now[63:PRECISION_BITS]); + // Logic to pass cmd forward + assign exec = pending && (!cached_req_has_time || ontime || (EXEC_LATE_CMDS && late)); + assign consume = exec || late; + + assign m_ctrlport_req_wr = cached_req_wr & exec; + assign m_ctrlport_req_rd = cached_req_rd & exec; + assign m_ctrlport_req_addr = cached_req_addr; + assign m_ctrlport_req_data = cached_req_data; + assign m_ctrlport_req_byte_en = cached_req_byte_en; + + wire [1:0] resp_status = (late && !exec) ? AXIS_CTRL_STS_TSERR : m_ctrlport_resp_status; + axi_fifo_flop #(.WIDTH(2+32)) resp_cache_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({resp_status, m_ctrlport_resp_data}), + .i_tvalid(m_ctrlport_resp_ack || (late && !exec)), .i_tready(), + .o_tdata({s_ctrlport_resp_status, s_ctrlport_resp_data}), + .o_tvalid(s_ctrlport_resp_ack), .o_tready(s_ctrlport_resp_ack), + .occupied(), .space() + ); + +endmodule // ctrlport_timer + diff --git a/fpga/usrp3/lib/rfnoc/utils/ctrlport_to_settings_bus.v b/fpga/usrp3/lib/rfnoc/utils/ctrlport_to_settings_bus.v new file mode 100644 index 000000000..121b0ea40 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/ctrlport_to_settings_bus.v @@ -0,0 +1,241 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: ctrlport_to_settings_bus +// +// Description: +// +// Converts CTRL port interface requests to a user register settings bus +// access. This can be used to connect RFNoC block IP settings registers and +// user read-back registers to a control port. +// +// There are a few key differences between control port and the settings bus +// that need to be accounted for. +// +// * Control port uses byte address whereas the settings bus uses a +// word address. +// * Control port is 32-bit whereas the settings bus supports +// 32-bit writes and 64-bit reads. +// * The settings bus always does both a write and a read for each +// transaction. If the intent is to read a register, then it writes the +// address for the read to SR_RB_ADDR. If the intent is to write a +// register, then the read result is ignored. +// +// This block handles these differences by allocating a 2048-byte address +// space to each settings bus. Each word of the settings bus is treated like +// a 64-bit word on an eight-byte boundary. To write to a settings register +// N, simply write a 32-bit value to address N*8. To read read-back register +// N, simply perform a 32-bit read from address N*8 followed by a 32-bit read +// from address N*8+4 to get the full 64-bits. If only the lower 32-bits are +// needed then it is not necessary to read the upper 32 bits. Note however, +// that software must always read the lower 32-bits before trying to read the +// upper 32-bits and that these reads should be atomic (no intervening reads +// should occur). +// +// Parameters: +// +// NUM_PORTS : The number of settings buses you wish to connect +// +// SR_RB_ADDR : Address to use for the settings register that holds the +// read-back address. Set to 124 to model register access to +// user logic registers. Set to 127 to model access to internal +// NoC shell registers. +// +// USE_TIME : When 0, timestamps are simply passed from ctrlport to +// settings bus and the timestamp input is not used. When 1, +// this block will wait until the indicated time to arrive on +// the timestamp input before issuing the transaction on the +// settings bus. In this case, the time must be provided on the +// timestamp input. +// + +module ctrlport_to_settings_bus #( + parameter NUM_PORTS = 1, + parameter SR_RB_ADDR = 124, + parameter USE_TIME = 0 +) ( + input wire ctrlport_clk, + input wire ctrlport_rst, + + //--------------------------------------------------------------------------- + // CTRL Port Interface + //--------------------------------------------------------------------------- + + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + output reg s_ctrlport_resp_ack = 0, + output reg [31:0] s_ctrlport_resp_data, + + //--------------------------------------------------------------------------- + // Settings Bus Interface + //--------------------------------------------------------------------------- + + output wire [NUM_PORTS*32-1:0] set_data, + output wire [ NUM_PORTS*8-1:0] set_addr, + output reg [ NUM_PORTS-1:0] set_stb = 0, + output wire [NUM_PORTS*64-1:0] set_time, + output wire [ NUM_PORTS-1:0] set_has_time, + + input [NUM_PORTS-1:0] rb_stb, + output [NUM_PORTS*8-1:0] rb_addr, + input [NUM_PORTS*64-1:0] rb_data, + + //--------------------------------------------------------------------------- + // Timestamp + //--------------------------------------------------------------------------- + + // Current timestamp, synchronous to ctrlport_clk + input wire [63:0] timestamp +); + + localparam PORT_W = (NUM_PORTS > 1) ? $clog2(NUM_PORTS) : 1; + + wire [PORT_W-1:0] port_num; + reg [PORT_W-1:0] port_num_reg; + + wire msw_access; + + reg [31:0] set_data_reg; + reg [ 7:0] set_addr_reg; + reg [63:0] set_time_reg; + reg set_has_time_reg; + reg [ 7:0] rb_addr_reg; + + reg [31:0] upper_word; + + // Extract the port index from the address (the bits above the lower 11 bits) + assign port_num = (NUM_PORTS > 1) ? s_ctrlport_req_addr[(PORT_W+11)-1:11] : 0; + + // Determine if the upper word is being accessed + assign msw_access = s_ctrlport_req_addr[2]; + + localparam ST_IDLE = 0; + localparam ST_TIME_CHECK = 1; + localparam ST_STROBE_WAIT = 2; + localparam ST_WAIT_RESP = 3; + + reg [2:0] state = ST_IDLE; + + + always @(posedge ctrlport_clk) begin + if (ctrlport_rst) begin + s_ctrlport_resp_ack <= 0; + set_stb <= 0; + state <= ST_IDLE; + s_ctrlport_resp_data <= 32'hX; + set_addr_reg <= 8'hX; + rb_addr_reg <= 8'hX; + port_num_reg <= 8'hX; + upper_word <= 32'hX; + end else begin + // Default assignments + s_ctrlport_resp_ack <= 0; + set_stb <= 0; + + case (state) + ST_IDLE : begin + if (s_ctrlport_req_rd && port_num < NUM_PORTS) begin + // Handle register reads (read-back registers) + if (msw_access) begin + // Reading the upper word always returns the cached upper-word value + // from the previous lower-word read. + s_ctrlport_resp_ack <= 1; + s_ctrlport_resp_data <= upper_word; + end else begin + // Handle register reads (read-back registers) + rb_addr_reg <= s_ctrlport_req_addr[10:3]; + + // Read-back of a user register on settings bus is always + // combined with a write to the SR_RB_ADDR address. + set_addr_reg <= SR_RB_ADDR; + set_data_reg <= 32'bX; // CtrlPort has no data in this case + set_time_reg <= s_ctrlport_req_time; + set_has_time_reg <= s_ctrlport_req_has_time; + + // Save which port the read is for so that we only watch for + // acknowledgments from that port. + port_num_reg <= port_num; + + if (USE_TIME) begin + state <= ST_TIME_CHECK; + end else begin + set_stb[port_num] <= 1; + state <= ST_STROBE_WAIT; + end + end + + end else if (s_ctrlport_req_wr && port_num < NUM_PORTS) begin + // Handle register writes (settings registers) + set_addr_reg <= s_ctrlport_req_addr[10:3]; + set_data_reg <= s_ctrlport_req_data; + set_time_reg <= s_ctrlport_req_time; + set_has_time_reg <= s_ctrlport_req_has_time; + + // Save which port the write is for so that we only watch for + // acknowledgments from that port. + port_num_reg <= port_num; + + if (USE_TIME) begin + state <= ST_TIME_CHECK; + end else begin + set_stb[port_num] <= 1; + state <= ST_STROBE_WAIT; + end + end + end + + ST_TIME_CHECK : begin + // If the transaction is timed, wait until the time arrives before + // starting. This state is only reachable if USE_TIME is true. + if (set_has_time_reg) begin + if (timestamp >= set_time_reg) begin + set_stb[port_num_reg] <= 1; + state <= ST_STROBE_WAIT; + end + end else begin + set_stb[port_num_reg] <= 1; + state <= ST_STROBE_WAIT; + end + end + + ST_STROBE_WAIT : begin + // Wait a cycle before checking for a response + state <= ST_WAIT_RESP; + end + + ST_WAIT_RESP : begin + // Wait for read completion on settings bus, acknowledged by rb_stb. + // The read-back data will be ignored by ctrlport if this is a write. + upper_word <= rb_data[(64*port_num_reg + 32) +: 32]; + s_ctrlport_resp_data <= rb_data[(64*port_num_reg + 0) +: 32]; + if (rb_stb[port_num_reg] == 1) begin + s_ctrlport_resp_ack <= 1; + state <= ST_IDLE; + end + end + + endcase + end + end + + + genvar i; + generate + for (i = 0; i < NUM_PORTS; i = i+1) begin : gen_settings_bus + // Drive all settings buses with the same values, except the strobe + assign rb_addr [ 8*i +: 8] = rb_addr_reg; + assign set_data [32*i +: 32] = set_data_reg; + assign set_addr [ 8*i +: 8] = set_addr_reg; + assign set_time [64*i +: 64] = set_time_reg; + assign set_has_time [ i] = set_has_time_reg; + end + endgenerate + + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/utils/noc_shell_generic_ctrlport_pyld_chdr.v b/fpga/usrp3/lib/rfnoc/utils/noc_shell_generic_ctrlport_pyld_chdr.v new file mode 100644 index 000000000..3a0e7fea7 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/noc_shell_generic_ctrlport_pyld_chdr.v @@ -0,0 +1,273 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: noc_shell_generic_ctrlport_pyld_chdr +// Description: +// +// Parameters: +// +// Signals: + +module noc_shell_generic_ctrlport_pyld_chdr #( + parameter [31:0] NOC_ID = 32'h0, + parameter [9:0] THIS_PORTID = 10'd0, + parameter CHDR_W = 64, + parameter [5:0] CTRL_FIFOSIZE = 0, + parameter [0:0] CTRLPORT_SLV_EN = 1, + parameter [5:0] NUM_DATA_I = 0, + parameter [5:0] NUM_DATA_O = 0, + parameter ITEM_W = 32, + parameter NIPC = 2, + parameter [5:0] MTU = 0, + parameter CTXT_FIFOSIZE = 1, + parameter PYLD_FIFOSIZE = 1 +)( + // Framework Interface + //------------------------------------------------------------ + // RFNoC Framework Clocks and Resets + input wire rfnoc_chdr_clk, + output wire rfnoc_chdr_rst, + input wire rfnoc_ctrl_clk, + output wire rfnoc_ctrl_rst, + // RFNoC Backend Interface + input wire [511:0] rfnoc_core_config, + output wire [511:0] rfnoc_core_status, + // CHDR Input Ports (from framework) + input wire [(CHDR_W*NUM_DATA_I)-1:0] s_rfnoc_chdr_tdata, + input wire [NUM_DATA_I-1:0] s_rfnoc_chdr_tlast, + input wire [NUM_DATA_I-1:0] s_rfnoc_chdr_tvalid, + output wire [NUM_DATA_I-1:0] s_rfnoc_chdr_tready, + // CHDR Output Ports (to framework) + output wire [(CHDR_W*NUM_DATA_O)-1:0] m_rfnoc_chdr_tdata, + output wire [NUM_DATA_O-1:0] m_rfnoc_chdr_tlast, + output wire [NUM_DATA_O-1:0] m_rfnoc_chdr_tvalid, + input wire [NUM_DATA_O-1:0] m_rfnoc_chdr_tready, + // AXIS-Ctrl Input Port (from framework) + input wire [31:0] s_rfnoc_ctrl_tdata, + input wire s_rfnoc_ctrl_tlast, + input wire s_rfnoc_ctrl_tvalid, + output wire s_rfnoc_ctrl_tready, + // AXIS-Ctrl Output Port (to framework) + output wire [31:0] m_rfnoc_ctrl_tdata, + output wire m_rfnoc_ctrl_tlast, + output wire m_rfnoc_ctrl_tvalid, + input wire m_rfnoc_ctrl_tready, + + // Client Interface + //------------------------------------------------------------ + // Control Port Master (Request) + output wire m_ctrlport_req_wr, + output wire m_ctrlport_req_rd, + output wire [19:0] m_ctrlport_req_addr, + output wire [31:0] m_ctrlport_req_data, + output wire [3:0] m_ctrlport_req_byte_en, + output wire m_ctrlport_req_has_time, + output wire [63:0] m_ctrlport_req_time, + input wire m_ctrlport_resp_ack, + input wire [1:0] m_ctrlport_resp_status, + input wire [31:0] m_ctrlport_resp_data, + // Control Port Slave (Request) + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [9:0] s_ctrlport_req_portid, + input wire [15:0] s_ctrlport_req_rem_epid, + input wire [9:0] s_ctrlport_req_rem_portid, + input wire [31:0] s_ctrlport_req_data, + input wire [3:0] s_ctrlport_req_byte_en, + input wire s_ctrlport_req_has_time, + input wire [63:0] s_ctrlport_req_time, + output wire s_ctrlport_resp_ack, + output wire [1:0] s_ctrlport_resp_status, + output wire [31:0] s_ctrlport_resp_data, + // Payload stream out (to user logic) + output wire [(ITEM_W*NIPC*NUM_DATA_I)-1:0] m_axis_payload_tdata, + output wire [(NIPC*NUM_DATA_I)-1:0] m_axis_payload_tkeep, + output wire [NUM_DATA_I-1:0] m_axis_payload_tlast, + output wire [NUM_DATA_I-1:0] m_axis_payload_tvalid, + input wire [NUM_DATA_I-1:0] m_axis_payload_tready, + // Context stream out (to user logic) + output wire [(CHDR_W*NUM_DATA_I)-1:0] m_axis_context_tdata, + output wire [(4*NUM_DATA_I)-1:0] m_axis_context_tuser, + output wire [NUM_DATA_I-1:0] m_axis_context_tlast, + output wire [NUM_DATA_I-1:0] m_axis_context_tvalid, + input wire [NUM_DATA_I-1:0] m_axis_context_tready, + // Payload stream in (from user logic) + input wire [(ITEM_W*NIPC*NUM_DATA_O)-1:0] s_axis_payload_tdata, + input wire [(NIPC*NUM_DATA_O)-1:0] s_axis_payload_tkeep, + input wire [NUM_DATA_O-1:0] s_axis_payload_tlast, + input wire [NUM_DATA_O-1:0] s_axis_payload_tvalid, + output wire [NUM_DATA_O-1:0] s_axis_payload_tready, + // Context stream in (from user logic) + input wire [(CHDR_W*NUM_DATA_O)-1:0] s_axis_context_tdata, + input wire [(4*NUM_DATA_O)-1:0] s_axis_context_tuser, + input wire [NUM_DATA_O-1:0] s_axis_context_tlast, + input wire [NUM_DATA_O-1:0] s_axis_context_tvalid, + output wire [NUM_DATA_O-1:0] s_axis_context_tready +); + + // --------------------------------------------------- + // Backend Interface + // --------------------------------------------------- + wire data_i_flush_en; + wire [31:0] data_i_flush_timeout; + wire [63:0] data_i_flush_active; + wire [63:0] data_i_flush_done; + wire data_o_flush_en; + wire [31:0] data_o_flush_timeout; + wire [63:0] data_o_flush_active; + wire [63:0] data_o_flush_done; + + backend_iface #( + .NOC_ID (NOC_ID ), + .NUM_DATA_I (NUM_DATA_I ), + .NUM_DATA_O (NUM_DATA_O ), + .CTRL_FIFOSIZE (CTRL_FIFOSIZE), + .MTU (MTU ) + ) backend_iface_i ( + .rfnoc_chdr_clk (rfnoc_chdr_clk ), + .rfnoc_ctrl_clk (rfnoc_ctrl_clk ), + .rfnoc_core_config (rfnoc_core_config ), + .rfnoc_core_status (rfnoc_core_status ), + .rfnoc_chdr_rst (rfnoc_chdr_rst ), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst ), + .data_i_flush_en (data_i_flush_en ), + .data_i_flush_timeout (data_i_flush_timeout), + .data_i_flush_active (data_i_flush_active ), + .data_i_flush_done (data_i_flush_done ), + .data_o_flush_en (data_o_flush_en ), + .data_o_flush_timeout (data_o_flush_timeout), + .data_o_flush_active (data_o_flush_active ), + .data_o_flush_done (data_o_flush_done ) + ); + + // --------------------------------------------------- + // Control Path + // --------------------------------------------------- + + ctrlport_endpoint #( + .THIS_PORTID (THIS_PORTID ), + .SYNC_CLKS (0 ), + .AXIS_CTRL_MST_EN (CTRLPORT_SLV_EN), + .AXIS_CTRL_SLV_EN (1 ), + .SLAVE_FIFO_SIZE (CTRL_FIFOSIZE ) + ) ctrlport_ep_i ( + .rfnoc_ctrl_clk (rfnoc_ctrl_clk ), + .rfnoc_ctrl_rst (rfnoc_ctrl_rst ), + .ctrlport_clk (rfnoc_chdr_clk ), + .ctrlport_rst (rfnoc_chdr_rst ), + .s_rfnoc_ctrl_tdata (s_rfnoc_ctrl_tdata ), + .s_rfnoc_ctrl_tlast (s_rfnoc_ctrl_tlast ), + .s_rfnoc_ctrl_tvalid (s_rfnoc_ctrl_tvalid ), + .s_rfnoc_ctrl_tready (s_rfnoc_ctrl_tready ), + .m_rfnoc_ctrl_tdata (m_rfnoc_ctrl_tdata ), + .m_rfnoc_ctrl_tlast (m_rfnoc_ctrl_tlast ), + .m_rfnoc_ctrl_tvalid (m_rfnoc_ctrl_tvalid ), + .m_rfnoc_ctrl_tready (m_rfnoc_ctrl_tready ), + .m_ctrlport_req_wr (m_ctrlport_req_wr ), + .m_ctrlport_req_rd (m_ctrlport_req_rd ), + .m_ctrlport_req_addr (m_ctrlport_req_addr ), + .m_ctrlport_req_data (m_ctrlport_req_data ), + .m_ctrlport_req_byte_en (m_ctrlport_req_byte_en ), + .m_ctrlport_req_has_time (m_ctrlport_req_has_time ), + .m_ctrlport_req_time (m_ctrlport_req_time ), + .m_ctrlport_resp_ack (m_ctrlport_resp_ack ), + .m_ctrlport_resp_status (m_ctrlport_resp_status ), + .m_ctrlport_resp_data (m_ctrlport_resp_data ), + .s_ctrlport_req_wr (s_ctrlport_req_wr ), + .s_ctrlport_req_rd (s_ctrlport_req_rd ), + .s_ctrlport_req_addr (s_ctrlport_req_addr ), + .s_ctrlport_req_portid (s_ctrlport_req_portid ), + .s_ctrlport_req_rem_epid (s_ctrlport_req_rem_epid ), + .s_ctrlport_req_rem_portid(s_ctrlport_req_rem_portid), + .s_ctrlport_req_data (s_ctrlport_req_data ), + .s_ctrlport_req_byte_en (s_ctrlport_req_byte_en ), + .s_ctrlport_req_has_time (s_ctrlport_req_has_time ), + .s_ctrlport_req_time (s_ctrlport_req_time ), + .s_ctrlport_resp_ack (s_ctrlport_resp_ack ), + .s_ctrlport_resp_status (s_ctrlport_resp_status ), + .s_ctrlport_resp_data (s_ctrlport_resp_data ) + ); + + // --------------------------------------------------- + // Data Path + // --------------------------------------------------- + + genvar i; + generate + for (i = 0; i < NUM_DATA_I; i = i + 1) begin: in + chdr_to_axis_pyld_ctxt #( + .CHDR_W (CHDR_W ), + .ITEM_W (ITEM_W ), + .NIPC (NIPC ), + .SYNC_CLKS (0 ), + .CONTEXT_FIFO_SIZE (CTXT_FIFOSIZE), + .PAYLOAD_FIFO_SIZE (PYLD_FIFOSIZE), + .CONTEXT_PREFETCH_EN (1 ) + ) chdr2raw_i ( + .axis_chdr_clk (rfnoc_chdr_clk ), + .axis_chdr_rst (rfnoc_chdr_rst ), + .axis_data_clk (rfnoc_chdr_clk ), + .axis_data_rst (rfnoc_chdr_rst ), + .s_axis_chdr_tdata (s_rfnoc_chdr_tdata [(i*CHDR_W)+:CHDR_W] ), + .s_axis_chdr_tlast (s_rfnoc_chdr_tlast [i] ), + .s_axis_chdr_tvalid (s_rfnoc_chdr_tvalid [i] ), + .s_axis_chdr_tready (s_rfnoc_chdr_tready [i] ), + .m_axis_payload_tdata (m_axis_payload_tdata [(i*ITEM_W*NIPC)+:(ITEM_W*NIPC)]), + .m_axis_payload_tkeep (m_axis_payload_tkeep [(i*NIPC)+:NIPC] ), + .m_axis_payload_tlast (m_axis_payload_tlast [i] ), + .m_axis_payload_tvalid(m_axis_payload_tvalid[i] ), + .m_axis_payload_tready(m_axis_payload_tready[i] ), + .m_axis_context_tdata (m_axis_context_tdata [(i*CHDR_W)+:(CHDR_W)] ), + .m_axis_context_tuser (m_axis_context_tuser [(i*4)+:4] ), + .m_axis_context_tlast (m_axis_context_tlast [i] ), + .m_axis_context_tvalid(m_axis_context_tvalid[i] ), + .m_axis_context_tready(m_axis_context_tready[i] ), + .flush_en (data_i_flush_en ), + .flush_timeout (data_i_flush_timeout ), + .flush_active (data_i_flush_active [i] ), + .flush_done (data_i_flush_done [i] ) + ); + end + + for (i = 0; i < NUM_DATA_O; i = i + 1) begin: out + axis_pyld_ctxt_to_chdr #( + .CHDR_W (CHDR_W ), + .ITEM_W (ITEM_W ), + .NIPC (NIPC ), + .SYNC_CLKS (0 ), + .CONTEXT_FIFO_SIZE (CTXT_FIFOSIZE ), + .PAYLOAD_FIFO_SIZE (PYLD_FIFOSIZE ), + .CONTEXT_PREFETCH_EN (1 ), + .MTU (MTU ) + ) raw2chdr_i ( + .axis_chdr_clk (rfnoc_chdr_clk ), + .axis_chdr_rst (rfnoc_chdr_rst ), + .axis_data_clk (rfnoc_chdr_clk ), + .axis_data_rst (rfnoc_chdr_rst ), + .m_axis_chdr_tdata (m_rfnoc_chdr_tdata [(i*CHDR_W)+:CHDR_W] ), + .m_axis_chdr_tlast (m_rfnoc_chdr_tlast [i] ), + .m_axis_chdr_tvalid (m_rfnoc_chdr_tvalid [i] ), + .m_axis_chdr_tready (m_rfnoc_chdr_tready [i] ), + .s_axis_payload_tdata (s_axis_payload_tdata [(i*ITEM_W*NIPC)+:(ITEM_W*NIPC)]), + .s_axis_payload_tkeep (s_axis_payload_tkeep [(i*NIPC)+:NIPC] ), + .s_axis_payload_tlast (s_axis_payload_tlast [i] ), + .s_axis_payload_tvalid(s_axis_payload_tvalid[i] ), + .s_axis_payload_tready(s_axis_payload_tready[i] ), + .s_axis_context_tdata (s_axis_context_tdata [(i*CHDR_W)+:(CHDR_W)] ), + .s_axis_context_tuser (s_axis_context_tuser [(i*4)+:4] ), + .s_axis_context_tlast (s_axis_context_tlast [i] ), + .s_axis_context_tvalid(s_axis_context_tvalid[i] ), + .s_axis_context_tready(s_axis_context_tready[i] ), + .framer_errors ( ), + .flush_en (data_o_flush_en ), + .flush_timeout (data_o_flush_timeout ), + .flush_active (data_o_flush_active [i] ), + .flush_done (data_o_flush_done [i] ) + ); + end + endgenerate + +endmodule // noc_shell_generic_ctrlport_raw diff --git a/fpga/usrp3/lib/rfnoc/utils/timekeeper.v b/fpga/usrp3/lib/rfnoc/utils/timekeeper.v new file mode 100644 index 000000000..404f45758 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/utils/timekeeper.v @@ -0,0 +1,279 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: timekeeper +// +// Description: Timekeeper for RFNoC blocks. This block contains a 64-bit +// counter to represent the current time in terms of sample clock cycles. The +// counter can be updated and synchronized using the pps input. +// +// WARNING: All register larger than a single 32-bit word should be read and +// written least significant word first to guarantee coherency. +// + +module timekeeper #( + parameter BASE_ADDR = 'h00, + parameter TIME_INCREMENT = 1 // Amount by which to increment time on each sample clock cycle +) ( + input wire tb_clk, // Time-base clock + input wire tb_rst, // Time-base reset in tb_clk domain + + //--------------------------------------------------------------------------- + // Control Interface + //--------------------------------------------------------------------------- + + input wire s_ctrlport_clk, + input wire s_ctrlport_req_wr, + input wire s_ctrlport_req_rd, + input wire [19:0] s_ctrlport_req_addr, + input wire [31:0] s_ctrlport_req_data, + output wire s_ctrlport_resp_ack, + output wire [31:0] s_ctrlport_resp_data, + + //--------------------------------------------------------------------------- + // Time + //--------------------------------------------------------------------------- + + input wire sample_rx_stb, // Sample Rx strobe (data valid indicator) + input wire pps, // Pulse per second input + output reg [63:0] tb_timestamp, // 64-bit global timestamp synchronous to tb_clk + output reg [63:0] tb_timestamp_last_pps, // 64-bit global timestamp synchronous to tb_clk + output reg [63:0] tb_period_ns_q32 // Time Period of time-base in nanoseconds +); + + //--------------------------------------------------------------------------- + // Register Logic + //--------------------------------------------------------------------------- + + reg set_time_pps; + reg set_time_now; + reg new_time_ctrl; + reg [63:0] time_at_next_event; // Time to load at next timed event + + reg [31:0] tb_timestamp_hi; // Holding register for reading tb_timestamp + reg [31:0] time_at_next_event_lo; // Holding register for writing time_at_next_event + reg [31:0] time_at_next_event_hi; // Holding register for reading time_at_next_event + reg [31:0] tb_timestamp_last_pps_hi; // Holding register for reading tb_timestamp_last_pps + + wire s_ctrlport_req_wr_tb; + wire s_ctrlport_req_rd_tb; + wire [19:0] s_ctrlport_req_addr_tb; + wire [31:0] s_ctrlport_req_data_tb; + reg s_ctrlport_resp_ack_tb; + reg [31:0] s_ctrlport_resp_data_tb; + + // Clock crossing from ctrlport_clk to tb_clk domain + + ctrlport_clk_cross ctrlport_clk_cross_tb_i ( + .rst (tb_rst), + .s_ctrlport_clk (s_ctrlport_clk), + .s_ctrlport_req_wr (s_ctrlport_req_wr), + .s_ctrlport_req_rd (s_ctrlport_req_rd), + .s_ctrlport_req_addr (s_ctrlport_req_addr), + .s_ctrlport_req_portid (), + .s_ctrlport_req_rem_epid (), + .s_ctrlport_req_rem_portid (), + .s_ctrlport_req_data (s_ctrlport_req_data), + .s_ctrlport_req_byte_en (), + .s_ctrlport_req_has_time (), + .s_ctrlport_req_time (), + .s_ctrlport_resp_ack (s_ctrlport_resp_ack), + .s_ctrlport_resp_status (), + .s_ctrlport_resp_data (s_ctrlport_resp_data), + .m_ctrlport_clk (tb_clk), + .m_ctrlport_req_wr (s_ctrlport_req_wr_tb), + .m_ctrlport_req_rd (s_ctrlport_req_rd_tb), + .m_ctrlport_req_addr (s_ctrlport_req_addr_tb), + .m_ctrlport_req_portid (), + .m_ctrlport_req_rem_epid (), + .m_ctrlport_req_rem_portid (), + .m_ctrlport_req_data (s_ctrlport_req_data_tb), + .m_ctrlport_req_byte_en (), + .m_ctrlport_req_has_time (), + .m_ctrlport_req_time (), + .m_ctrlport_resp_ack (s_ctrlport_resp_ack_tb), + .m_ctrlport_resp_status (), + .m_ctrlport_resp_data (s_ctrlport_resp_data_tb) + ); + + //--------------------------------------------------------------------------- + // Timekeeper Register Offsets + //--------------------------------------------------------------------------- + + localparam REG_TIME_NOW_LO = 'h00; // Current time count (low word) + localparam REG_TIME_NOW_HI = 'h04; // Current time count (high word) + localparam REG_TIME_EVENT_LO = 'h08; // Time for next event (low word) + localparam REG_TIME_EVENT_HI = 'h0C; // Time for next event (high word) + localparam REG_TIME_CTRL = 'h10; // Time control word + localparam REG_TIME_LAST_PPS_LO = 'h14; // Time of last PPS pulse edge (low word) + localparam REG_TIME_LAST_PPS_HI = 'h18; // Time of last PPS pulse edge (high word) + localparam REG_TIME_BASE_PERIOD_LO = 'h1C; // Time Period in nanoseconds (low word) + localparam REG_TIME_BASE_PERIOD_HI = 'h20; // Time Period in nanoseconds (high word) + + // REG_TIME_CTRL bit fields + localparam TIME_NOW_POS = 0; + localparam TIME_PPS_POS = 1; + + always @(posedge tb_clk) begin + if (tb_rst) begin + s_ctrlport_resp_ack_tb <= 0; + s_ctrlport_resp_data_tb <= 0; + new_time_ctrl <= 0; + set_time_pps <= 0; + set_time_now <= 0; + end else begin + // Default assignments + s_ctrlport_resp_ack_tb <= 0; + s_ctrlport_resp_data_tb <= 0; + new_time_ctrl <= 0; + + // Handle register writes + if (s_ctrlport_req_wr_tb) begin + case (s_ctrlport_req_addr_tb) + BASE_ADDR + REG_TIME_EVENT_LO: begin + time_at_next_event_lo <= s_ctrlport_req_data_tb; + s_ctrlport_resp_ack_tb <= 1; + end + BASE_ADDR + REG_TIME_EVENT_HI: begin + time_at_next_event[31: 0] <= time_at_next_event_lo; + time_at_next_event[63:32] <= s_ctrlport_req_data_tb; + s_ctrlport_resp_ack_tb <= 1; + end + BASE_ADDR + REG_TIME_CTRL: begin + set_time_pps <= s_ctrlport_req_data_tb[TIME_PPS_POS]; + set_time_now <= s_ctrlport_req_data_tb[TIME_NOW_POS]; + new_time_ctrl <= 1; + s_ctrlport_resp_ack_tb <= 1; + end + BASE_ADDR + REG_TIME_BASE_PERIOD_LO: begin + tb_period_ns_q32[31:0] <= s_ctrlport_req_data_tb; + s_ctrlport_resp_ack_tb <= 1; + end + BASE_ADDR + REG_TIME_BASE_PERIOD_HI: begin + tb_period_ns_q32[63:32] <= s_ctrlport_req_data_tb; + s_ctrlport_resp_ack_tb <= 1; + end + endcase + end + + // Handle register reads + if (s_ctrlport_req_rd_tb) begin + case (s_ctrlport_req_addr_tb) + BASE_ADDR + REG_TIME_NOW_LO: begin + s_ctrlport_resp_data_tb <= tb_timestamp[31:0]; + tb_timestamp_hi <= tb_timestamp[63:32]; + s_ctrlport_resp_ack_tb <= 1; + end + BASE_ADDR + REG_TIME_NOW_HI: begin + s_ctrlport_resp_data_tb <= tb_timestamp_hi; + s_ctrlport_resp_ack_tb <= 1; + end + BASE_ADDR + REG_TIME_EVENT_LO: begin + s_ctrlport_resp_data_tb <= time_at_next_event[31:0]; + time_at_next_event_hi <= time_at_next_event[63:32]; + s_ctrlport_resp_ack_tb <= 1; + end + BASE_ADDR + REG_TIME_EVENT_HI: begin + s_ctrlport_resp_data_tb <= time_at_next_event_hi; + s_ctrlport_resp_ack_tb <= 1; + end + BASE_ADDR + REG_TIME_CTRL: begin + s_ctrlport_resp_data_tb <= 0; + s_ctrlport_resp_data_tb[TIME_PPS_POS] <= set_time_pps; + s_ctrlport_resp_data_tb[TIME_NOW_POS] <= set_time_now; + s_ctrlport_resp_ack_tb <= 1; + end + BASE_ADDR + REG_TIME_LAST_PPS_LO: begin + s_ctrlport_resp_data_tb <= tb_timestamp_last_pps[31:0]; + tb_timestamp_last_pps_hi <= tb_timestamp_last_pps[63:32]; + s_ctrlport_resp_ack_tb <= 1; + end + BASE_ADDR + REG_TIME_LAST_PPS_HI: begin + s_ctrlport_resp_data_tb <= tb_timestamp_last_pps_hi; + s_ctrlport_resp_ack_tb <= 1; + end + BASE_ADDR + REG_TIME_BASE_PERIOD_LO: begin + s_ctrlport_resp_data_tb <= tb_period_ns_q32[31:0]; + s_ctrlport_resp_ack_tb <= 1; + end + BASE_ADDR + REG_TIME_BASE_PERIOD_HI: begin + s_ctrlport_resp_data_tb <= tb_period_ns_q32[63:32]; + s_ctrlport_resp_ack_tb <= 1; + end + endcase + end + end + end + + + //--------------------------------------------------------------------------- + // Pulse Per Second + //--------------------------------------------------------------------------- + + reg pps_del; + reg pps_edge; + + always @(posedge tb_clk) begin + if (tb_rst) begin + pps_del <= 0; + pps_edge <= 0; + end else begin + pps_del <= pps; + pps_edge<= pps_del & ~pps; + end + end + + + //--------------------------------------------------------------------------- + // Time Tracker + //--------------------------------------------------------------------------- + + reg time_event_armed; // Boolean to indicate if we're expecting a timed event + + wire time_event = + time_event_armed && ( + set_time_now || (set_time_pps && pps_edge) + ); + + always @(posedge tb_clk) begin + if (tb_rst) begin + tb_timestamp <= 0; + time_event_armed <= 0; + end else begin + if (time_event) begin + // Load the timing info configured prior to the event + time_event_armed <= 0; + tb_timestamp <= time_at_next_event; + end else if (sample_rx_stb) begin + // Update time for each sample word received + tb_timestamp <= tb_timestamp + TIME_INCREMENT; + end + + if (new_time_ctrl) begin + // Indicate that we're expecting a timed event because the time control + // register was updated. + time_event_armed <= 1; + end + end + end + + + //--------------------------------------------------------------------------- + // PPS Tracker + //--------------------------------------------------------------------------- + + always @(posedge tb_clk) begin + if (tb_rst) begin + tb_timestamp_last_pps <= 64'h0; + end else if (pps_edge) begin + if (time_event) begin + tb_timestamp_last_pps <= time_at_next_event; + end else begin + tb_timestamp_last_pps <= tb_timestamp + TIME_INCREMENT; + end + end + end + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/vector_iir.v b/fpga/usrp3/lib/rfnoc/vector_iir.v new file mode 100644 index 000000000..a875f34fe --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/vector_iir.v @@ -0,0 +1,187 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: vector_iir +// Description: +// This module implements an IIR filter with a variable length delay line. +// Transfer Function: beta +// H(z) = ------------------ +// 1 - alpha*z^-delay +// where: +// - beta is the feedforward tap +// - alpha is the feedback tap +// - delay (aka vector_len) is the feedback tap delay +// +// Parameters: +// - MAX_VECTOR_LEN: Maximum value for delay (vector_len) +// - IN_W: Input sample width for a real sample which includes the sign bit. +// The actual input of the module will be 2*IN_W because it handles +// complex data. +// - OUT_W: Output sample width for a real sample which includes the sign bit. +// The actual output of the module will be 2*OUT_W because it handles +// complex data. +// - ALPHA_W: Width of the alpha parameter (signed) +// - BETA_W: Width of the beta parameter (signed) +// - FEEDBACK_W: Number of bits in the feedback delay line (optimal = 25) +// - ACCUM_HEADROOM: Number of bits of headroom in the feedback accumulator +// Signals: +// - i_* : Input sample stream (AXI-Stream) +// - o_* : Output sample stream (AXI-Stream) +// - set_*: Static settings +// + +module vector_iir #( + parameter MAX_VECTOR_LEN = 1024, + parameter IN_W = 16, + parameter OUT_W = 16, + parameter ALPHA_W = 16, + parameter BETA_W = 16, + parameter FEEDBACK_W = 25, + parameter ACCUM_HEADROOM = 4 +)( + input wire clk, + input wire reset, + input wire [$clog2(MAX_VECTOR_LEN)-1:0] set_vector_len, + input wire [BETA_W-1:0] set_beta, + input wire [ALPHA_W-1:0] set_alpha, + input wire [IN_W*2-1:0] i_tdata, + input wire i_tlast, + input wire i_tvalid, + output wire i_tready, + output wire [OUT_W*2-1:0] o_tdata, + output wire o_tlast, + output wire o_tvalid, + input wire o_tready +); + + // There are four registers between the input and output + // - Input pipeline (in_X_reg) + // - Feedforward product (ff_prod_X_reg) + // - Feedback sum (fb_sum_X_reg) + // - Output pipeline (dsp_data_out) + localparam IN_TO_OUT_LATENCY = 4; + + // The feedback path has 3 cycles of delay + // - Feedback sum (fb_sum_X_reg) + // - variable_delay_line (2 cyc) + // - Scaled feedback (fb_sum_scaled_X_reg) + localparam MIN_FB_DELAY = 4; + + // Pipeline settings for timing + reg [$clog2(MAX_VECTOR_LEN)-1:0] reg_fb_delay; + reg signed [BETA_W-1:0] reg_beta; + reg signed [ALPHA_W-1:0] reg_alpha; + + always @(posedge clk) begin + reg_fb_delay <= set_vector_len - MIN_FB_DELAY - 1; //Adjust for pipeline delay + reg_beta <= set_beta; + reg_alpha <= set_alpha; + end + + //----------------------------------------------------------- + // AXI-Stream wrapper + //----------------------------------------------------------- + wire [(IN_W*2)-1:0] dsp_data_in; + reg [(OUT_W*2)-1:0] dsp_data_out = 0; + wire [IN_TO_OUT_LATENCY-1:0] chain_en; + + // We are implementing an N-cycle DSP operation without AXI-Stream handshaking. + // Use an axis_shift_register and the associated strobes to drive clock enables + // on the DSP regs to ensure that data/valid/last sync up. + axis_shift_register #( + .WIDTH(IN_W*2), .NSPC(1), .LATENCY(IN_TO_OUT_LATENCY), + .SIDEBAND_DATAPATH(1), .PIPELINE("NONE") + ) axis_shreg_i ( + .clk(clk), .reset(reset), + .s_axis_tdata(i_tdata), .s_axis_tkeep(1'b1), .s_axis_tlast(i_tlast), + .s_axis_tvalid(i_tvalid), .s_axis_tready(i_tready), + .m_axis_tdata(o_tdata), .m_axis_tkeep(), .m_axis_tlast(o_tlast), + .m_axis_tvalid(o_tvalid), .m_axis_tready(o_tready), + .stage_stb(chain_en), .stage_eop(), + .m_sideband_data(dsp_data_in), .m_sideband_keep(), + .s_sideband_data(dsp_data_out) + ); + + //----------------------------------------------------------- + // DSP datapath + //----------------------------------------------------------- + localparam FF_PROD_W = IN_W + BETA_W - 1; + localparam FB_PROD_W = FEEDBACK_W + ALPHA_W - 1; + + reg signed [IN_W-1:0] in_i_reg = 0, in_q_reg = 0; + reg signed [FF_PROD_W-1:0] ff_prod_i_reg = 0, ff_prod_q_reg = 0; + reg signed [FB_PROD_W-1:0] fb_sum_i_reg = 0, fb_sum_q_reg = 0; + wire signed [FB_PROD_W-1:0] fb_trunc_i, fb_trunc_q; + wire signed [FEEDBACK_W-1:0] fb_sum_del_i, fb_sum_del_q; + reg signed [FB_PROD_W-1:0] fb_sum_scaled_i_reg = 0, fb_sum_scaled_q_reg = 0; + wire signed [OUT_W-1:0] out_i_rnd, out_q_rnd; + + always @(posedge clk) begin + if (reset) begin + {in_i_reg, in_q_reg} <= 0; + ff_prod_i_reg <= 0; + ff_prod_q_reg <= 0; + fb_sum_i_reg <= 0; + fb_sum_q_reg <= 0; + fb_sum_scaled_i_reg <= 0; + fb_sum_scaled_q_reg <= 0; + dsp_data_out <= 0; + end else begin + if (chain_en[0]) begin + // Input pipeline register + {in_i_reg, in_q_reg} <= dsp_data_in; + end + if (chain_en[1]) begin + // Feedforward product (x[n] * beta) + ff_prod_i_reg <= in_i_reg * reg_beta; + ff_prod_q_reg <= in_q_reg * reg_beta; + // Compute scaled, delayed feedback (y[n-D] * alpha) + fb_sum_scaled_i_reg <= fb_sum_del_i * reg_alpha; + fb_sum_scaled_q_reg <= fb_sum_del_q * reg_alpha; + end + if (chain_en[2]) begin + // Sum of feedforward product and scaled, delayed feedback + // y[n] = (alpha * y[n-D]) + (x[n] * beta) + fb_sum_i_reg <= fb_sum_scaled_i_reg + (ff_prod_i_reg <<< (FB_PROD_W - FF_PROD_W - ACCUM_HEADROOM)); + fb_sum_q_reg <= fb_sum_scaled_q_reg + (ff_prod_q_reg <<< (FB_PROD_W - FF_PROD_W - ACCUM_HEADROOM)); + end + if (chain_en[3]) begin + // Output pipeline register + dsp_data_out <= {out_i_rnd, out_q_rnd}; + end + end + end + + // Truncate feedback to the requested FEEDBACK_W + assign fb_trunc_i = (fb_sum_i_reg >>> (FB_PROD_W - FEEDBACK_W)); + assign fb_trunc_q = (fb_sum_q_reg >>> (FB_PROD_W - FEEDBACK_W)); + + // A variable delay line will be used to store the feedback + // This delay line stores "reg_fb_delay" worth of samples which + // allows each element in the vector to have it's own independent state + variable_delay_line #( + .WIDTH(FEEDBACK_W * 2), .DEPTH(MAX_VECTOR_LEN - MIN_FB_DELAY), + .DYNAMIC_DELAY(1), .DEFAULT_DATA(0), .OUT_REG(1) + ) delay_line_inst ( + .clk(clk), .clk_en(chain_en[1]), .reset(reset), + .stb_in(1'b1), + .data_in({fb_trunc_i[FEEDBACK_W-1:0], fb_trunc_q[FEEDBACK_W-1:0]}), + .delay(reg_fb_delay), + .data_out({fb_sum_del_i, fb_sum_del_q}) + ); + + // Round the accumulator output to produce the final output + round #( + .bits_in(FB_PROD_W-ACCUM_HEADROOM), .bits_out(OUT_W) + ) out_round_i_inst ( + .in(fb_sum_i_reg[FB_PROD_W-ACCUM_HEADROOM-1:0]), .out(out_i_rnd), .err() + ); + round #( + .bits_in(FB_PROD_W-ACCUM_HEADROOM), .bits_out(OUT_W) + ) out_round_q_inst ( + .in(fb_sum_q_reg[FB_PROD_W-ACCUM_HEADROOM-1:0]), .out(out_q_rnd), .err() + ); + +endmodule // vector_iir diff --git a/fpga/usrp3/lib/rfnoc/xport/Makefile.srcs b/fpga/usrp3/lib/rfnoc/xport/Makefile.srcs new file mode 100644 index 000000000..12582750b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/xport/Makefile.srcs @@ -0,0 +1,17 @@ +# +# Copyright 2018 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +################################################## +# RFNoC Utility Sources +################################################## +RFNOC_XPORT_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/xport/, \ +chdr_xport_adapter_generic.v \ +eth_ipv4_chdr64_adapter.v \ +eth_ipv4_chdr64_dispatch.v \ +eth_interface.v \ +eth_internal.v \ +liberio_chdr64_adapter.v \ +)) diff --git a/fpga/usrp3/lib/rfnoc/xport/chdr_xport_adapter_generic.v b/fpga/usrp3/lib/rfnoc/xport/chdr_xport_adapter_generic.v new file mode 100644 index 000000000..6de298530 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/xport/chdr_xport_adapter_generic.v @@ -0,0 +1,397 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_xport_adapter_generic +// Description: A generic transport adapter module that can be used in +// a veriety of transports. It does the following: +// - Exposes a configuration port for mgmt packets to configure the node +// - Implements a return-address map for packets with metadata other than +// the CHDR. Additional metadata can be passed as a tuser to this module +// which will store it in a map indexed by the SrcEPID in a management +// packet. For all returning packets, the metadata will be looked up in +// the map and attached as the outgoing tuser. +// - Implements a loopback path for node-info discovery +// - Converts data stream to/from "RFNoC Network Order" (64-bit-Big-Endian) +// +// Parameters: +// - PROTOVER: RFNoC protocol version {8'd<major>, 8'd<minor>} +// - CHDR_W: Width of the CHDR bus in bits +// - USER_W: Width of the tuser bus in bits +// - TBL_SIZE: Log2 of the depth of the routing table +// - NODE_TYPE: The node type to return for a node-info discovery +// - NODE_INST: The node type to return for a node-info discovery +// +// Signals: +// - device_id : The ID of the device that has instantiated this module +// - s_axis_xport_*: The input CHDR stream from the transport (plus tuser metadata) +// - m_axis_xport_*: The output CHDR stream to transport (plus tuser metadata) +// - s_axis_rfnoc_*: The input CHDR stream from the rfnoc infrastructure +// - m_axis_rfnoc_*: The output CHDR stream to the rfnoc infrastructure +// - ctrlport_* : The ctrlport interface for the configuration port +// + +module chdr_xport_adapter_generic #( + parameter [15:0] PROTOVER = {8'd1, 8'd0}, + parameter CHDR_W = 256, + parameter USER_W = 16, + parameter TBL_SIZE = 6, + parameter [7:0] NODE_SUBTYPE = 8'd0, + parameter NODE_INST = 0 +)( + // Clock and reset + input wire clk, + input wire rst, + // Device info + input wire [15:0] device_id, + // Transport stream in (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_xport_tdata, + input wire [USER_W-1:0] s_axis_xport_tuser, + input wire s_axis_xport_tlast, + input wire s_axis_xport_tvalid, + output wire s_axis_xport_tready, + // Transport stream out (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_xport_tdata, + output wire [USER_W-1:0] m_axis_xport_tuser, + output wire m_axis_xport_tlast, + output wire m_axis_xport_tvalid, + input wire m_axis_xport_tready, + // RFNoC stream in (AXI-Stream) + input wire [CHDR_W-1:0] s_axis_rfnoc_tdata, + input wire s_axis_rfnoc_tlast, + input wire s_axis_rfnoc_tvalid, + output wire s_axis_rfnoc_tready, + // RFNoC stream out (AXI-Stream) + output wire [CHDR_W-1:0] m_axis_rfnoc_tdata, + output wire m_axis_rfnoc_tlast, + output wire m_axis_rfnoc_tvalid, + input wire m_axis_rfnoc_tready, + // Control port endpoint + output wire ctrlport_req_wr, + output wire ctrlport_req_rd, + output wire [15:0] ctrlport_req_addr, + output wire [31:0] ctrlport_req_data, + input wire ctrlport_resp_ack, + input wire [31:0] ctrlport_resp_data +); + + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "../core/rfnoc_chdr_utils.vh" + `include "../core/rfnoc_chdr_internal_utils.vh" + + // --------------------------------------------------- + // Reverse groups of 64-bit words to translate + // stream to "RFNoC Network Order" i.e. Big-Endian + // in groups of 8 bytes + // --------------------------------------------------- + wire [CHDR_W-1:0] i_xport_tdata; + wire [USER_W-1:0] i_xport_tuser; + wire i_xport_tlast, i_xport_tvalid, i_xport_tready; + wire [CHDR_W-1:0] o_xport_tdata; + wire [USER_W-1:0] o_xport_tuser; + wire o_xport_tlast, o_xport_tvalid, o_xport_tready; + + localparam [$clog2(CHDR_W)-1:0] SWAP_LANES = ((CHDR_W / 64) - 1) << 6; + + axis_data_swap #( + .DATA_W(CHDR_W), .USER_W(USER_W), .STAGES_EN(SWAP_LANES), .DYNAMIC(0) + ) xport_in_swap_i ( + .clk(clk), .rst(rst), + .s_axis_tdata(s_axis_xport_tdata), .s_axis_tswap('h0), + .s_axis_tuser(s_axis_xport_tuser), .s_axis_tlast(s_axis_xport_tlast), + .s_axis_tvalid(s_axis_xport_tvalid), .s_axis_tready(s_axis_xport_tready), + .m_axis_tdata (i_xport_tdata), .m_axis_tuser(i_xport_tuser), + .m_axis_tlast (i_xport_tlast), + .m_axis_tvalid(i_xport_tvalid), .m_axis_tready(i_xport_tready) + ); + + axis_data_swap #( + .DATA_W(CHDR_W), .USER_W(USER_W), .STAGES_EN(SWAP_LANES), .DYNAMIC(0) + ) xport_out_swap_i ( + .clk(clk), .rst(rst), + .s_axis_tdata(o_xport_tdata), .s_axis_tswap('h0), + .s_axis_tuser(o_xport_tuser), .s_axis_tlast(o_xport_tlast), + .s_axis_tvalid(o_xport_tvalid), .s_axis_tready(o_xport_tready), + .m_axis_tdata (m_axis_xport_tdata), .m_axis_tuser (m_axis_xport_tuser), + .m_axis_tlast (m_axis_xport_tlast), + .m_axis_tvalid(m_axis_xport_tvalid), .m_axis_tready(m_axis_xport_tready) + ); + + + wire [CHDR_W-1:0] x2d_tdata; // Xport => Demux + reg [USER_W-1:0] x2d_tuser; + wire [1:0] x2d_tid; + wire x2d_tlast, x2d_tvalid, x2d_tready; + wire [CHDR_W-1:0] x2x_tdata; // Xport => Xport (loopback) + wire [USER_W-1:0] x2x_tuser; + wire x2x_tlast, x2x_tvalid, x2x_tready; + wire [CHDR_W-1:0] m2x_tdata; // Mux => Xport + wire m2x_tdest; // 1: Return to src, 0: CHDR input + wire [USER_W-1:0] m2x_tuser; + wire m2x_tlast, m2x_tvalid, m2x_tready; + + // --------------------------------------------------- + // Transport => DEMUX + // --------------------------------------------------- + wire op_stb; + wire [15:0] op_src_epid; + wire [USER_W-1:0] op_data; + wire lookup_stb, lookup_done_stb, lookup_result_match; + wire [15:0] lookup_epid; + wire [USER_W-1:0] lookup_result_value; + + chdr_mgmt_pkt_handler #( + .PROTOVER(PROTOVER), .CHDR_W(CHDR_W), .USER_W(USER_W), .MGMT_ONLY(0) + ) mgmt_ep_i ( + .clk(clk), .rst(rst), + .node_info(chdr_mgmt_build_node_info({10'h0, NODE_SUBTYPE}, NODE_INST, NODE_TYPE_TRANSPORT, device_id)), + .s_axis_chdr_tdata(i_xport_tdata), .s_axis_chdr_tlast(i_xport_tlast), + .s_axis_chdr_tvalid(i_xport_tvalid), .s_axis_chdr_tready(i_xport_tready), + .s_axis_chdr_tuser(i_xport_tuser), + .m_axis_chdr_tdata(x2d_tdata), .m_axis_chdr_tlast(x2d_tlast), + .m_axis_chdr_tdest(/* unused */), .m_axis_chdr_tid(x2d_tid), + .m_axis_chdr_tvalid(x2d_tvalid), .m_axis_chdr_tready(x2d_tready), + .ctrlport_req_wr(ctrlport_req_wr), .ctrlport_req_rd(ctrlport_req_rd), + .ctrlport_req_addr(ctrlport_req_addr), .ctrlport_req_data(ctrlport_req_data), + .ctrlport_resp_ack(ctrlport_resp_ack), .ctrlport_resp_data(ctrlport_resp_data), + .op_stb(op_stb), .op_dst_epid(/* unused */), .op_src_epid(op_src_epid), .op_data(op_data) + ); + + kv_map #( + .KEY_WIDTH(16), .VAL_WIDTH(USER_W), .SIZE(TBL_SIZE) + ) kv_map_i ( + .clk(clk), .reset(rst), + .insert_stb(op_stb), .insert_key(op_src_epid), .insert_val(op_data), + .insert_busy(/* Time between op_stb > Insertion time */), + .find_key_stb(lookup_stb), .find_key(lookup_epid), + .find_res_stb(lookup_done_stb), + .find_res_match(lookup_result_match), .find_res_val(lookup_result_value), + .count(/* unused */) + ); + + reg i_xport_hdr = 1'b1; + always @(posedge clk) begin + if (rst) + i_xport_hdr <= 1'b1; + else if (i_xport_tvalid && i_xport_tready) + i_xport_hdr <= i_xport_tlast; + end + + // chdr_mgmt_pkt_handler does not buffer packets and has at least one cycle of delay + // TODO: The tuser caching logic could be more robust + always @(posedge clk) begin + if (i_xport_tvalid && i_xport_tready && i_xport_hdr) + x2d_tuser <= i_xport_tuser; + end + + // --------------------------------------------------- + // MUX and DEMUX for return path + // --------------------------------------------------- + + wire [USER_W-1:0] dummy_tuser; + axis_switch #( + .DATA_W(CHDR_W+USER_W), .DEST_W(1), .IN_PORTS(1), .OUT_PORTS(2), .PIPELINE(0) + ) rtn_demux_i ( + .clk(clk), .reset(rst), + .s_axis_tdata({x2d_tuser, x2d_tdata}), .s_axis_alloc(1'b0), + .s_axis_tdest(x2d_tid == CHDR_MGMT_RETURN_TO_SRC ? 2'b01 : 2'b00), + .s_axis_tlast(x2d_tlast), .s_axis_tvalid(x2d_tvalid), .s_axis_tready(x2d_tready), + .m_axis_tdata({x2x_tuser, x2x_tdata, dummy_tuser, m_axis_rfnoc_tdata}), + .m_axis_tdest(/* unused */), + .m_axis_tlast({x2x_tlast, m_axis_rfnoc_tlast}), + .m_axis_tvalid({x2x_tvalid, m_axis_rfnoc_tvalid}), + .m_axis_tready({x2x_tready, m_axis_rfnoc_tready}) + ); + + axi_mux #( + .WIDTH(CHDR_W+USER_W+1), .SIZE(2), .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(0) + ) rtn_mux_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({1'b1, x2x_tuser, x2x_tdata, 1'b0, {USER_W{1'b0}}, s_axis_rfnoc_tdata}), + .i_tlast({x2x_tlast, s_axis_rfnoc_tlast}), + .i_tvalid({x2x_tvalid, s_axis_rfnoc_tvalid}), .i_tready({x2x_tready, s_axis_rfnoc_tready}), + .o_tdata({m2x_tdest, m2x_tuser, m2x_tdata}), .o_tlast(m2x_tlast), + .o_tvalid(m2x_tvalid), .o_tready(m2x_tready) + ); + + // --------------------------------------------------- + // MUX => Transport + // --------------------------------------------------- + + // In this section we must determine what value to put in tuser. If tdest is + // 1 then tuser is passed through unchanged. If tdest is 0 then the tuser + // value is looked up in the KV map using the EPID in the packet header. + // + // To do this we split the data (tdata, tlast) and the routing information + // (tdest, tuser, and the EPID) into two FIFOs. This allows us to perform a + // routing lookup and decide what to do while we continue to buffer data. + // + // With small packets, multiple routing lookups might be enqueued in the + // lookup_fifo, but we can only do one lookup at a time. Output logic + // controls release of packets from the data FIFO to ensure we only output + // one packet per lookup after the lookup is complete. + + wire data_fifo_i_tready; + wire [CHDR_W-1:0] data_fifo_o_tdata; + wire data_fifo_o_tlast; + wire data_fifo_o_tvalid; + wire data_fifo_o_tready; + wire lookup_fifo_i_tready; + wire lookup_fifo_tdest; + wire [USER_W-1:0] lookup_fifo_tuser; + wire [ 15:0] lookup_fifo_tepid; + wire lookup_fifo_o_tvalid; + wire lookup_fifo_o_tready; + + wire non_lookup_done_stb; + reg data_fifo_o_hdr = 1'b1; + reg pass_packet; + reg [USER_W-1:0] result_tuser; + reg result_tuser_valid; + reg [USER_W-1:0] reg_o_tuser; + + + // Track when the next m2x word contains is the start of a new packet + reg m2x_hdr = 1'b1; + always @(posedge clk) begin + if (rst) + m2x_hdr <= 1'b1; + else if (m2x_tvalid && m2x_tready) + m2x_hdr <= m2x_tlast; + end + + // We can only accept data from the mux when when both the data_fifo and + // lookup_fifo are ready. + assign m2x_tready = data_fifo_i_tready && lookup_fifo_i_tready; + + // The data_fifo only takes the packet data (tdata, tlast). We use an + // axi_fifo_short module for the data_fifo because it can tolerate tvalid + // going low before a transfer completes. + axi_fifo_short #( + .WIDTH (1+CHDR_W) + ) data_fifo ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .i_tdata ({m2x_tlast, m2x_tdata}), + .i_tvalid (m2x_tvalid && m2x_tready), + .i_tready (data_fifo_i_tready), + .o_tdata ({data_fifo_o_tlast, data_fifo_o_tdata}), + .o_tvalid (data_fifo_o_tvalid), + .o_tready (data_fifo_o_tready), + .space (), + .occupied () + ); + + // The lookup FIFO only takes the header routing info (tdest, tuser, epid). + // We use axi_fifo_short since it can tolerate tvalid going low before a + // transfer completes. + axi_fifo_short #( + .WIDTH (1+USER_W+16) + ) lookup_fifo ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .i_tdata ({m2x_tdest, m2x_tuser, chdr_get_dst_epid(m2x_tdata[63:0])}), + .i_tvalid (m2x_tvalid && m2x_tready && m2x_hdr), + .i_tready (lookup_fifo_i_tready), + .o_tdata ({lookup_fifo_tdest, lookup_fifo_tuser, lookup_fifo_tepid}), + .o_tvalid (lookup_fifo_o_tvalid), + .o_tready (lookup_fifo_o_tready), + .space (), + .occupied () + ); + + // Keep track of when we are busy doing a lookup in the KV map. + reg lookup_busy = 1'b0; + always @(posedge clk) begin + if (rst) + lookup_busy <= 1'b0; + else begin + if (lookup_stb) + lookup_busy <= 1'b1; + else if (lookup_done_stb) + lookup_busy <= 1'b0; + end + end + + // Determine if we can use the output of the lookup_fifo to do a KV map + // lookup. We only perform a KV map lookup if tdest is 0 and we can only do + // so if the KV map is free and the holding register for the tuser value is + // available. + assign lookup_epid = lookup_fifo_tepid; + assign lookup_stb = lookup_fifo_o_tvalid && !lookup_busy && + !lookup_fifo_tdest && !result_tuser_valid; + + // Determine if we can use the output of the lookup FIFO directly (no lookup + // is needed). We can only use it if we're not already doing a KV lookup and + // if the holding register for the tuser value is available. + assign non_lookup_done_stb = lookup_fifo_o_tvalid && !lookup_busy && + lookup_fifo_tdest && !result_tuser_valid; + + // Pop the routing info off of the lookup_fifo if we've started its lookup + assign lookup_fifo_o_tready = lookup_stb || non_lookup_done_stb; + + // Track when the next data_fifo_o word is the start of a new packet + always @(posedge clk) begin + if (rst) + data_fifo_o_hdr <= 1'b1; + else if (data_fifo_o_tvalid && data_fifo_o_tready && pass_packet) + data_fifo_o_hdr <= data_fifo_o_tlast; + end + + // Store the lookup result in a holding register. This can come from the KV + // map or the incoming tuser. + always @(posedge clk) begin + if (rst) begin + result_tuser <= {USER_W{1'bX}}; // Don't care + result_tuser_valid <= 1'b0; + end else begin + // The tuser holding register becomes available as soon as we start + // transmitting the corresponding packet. + if (data_fifo_o_tvalid && data_fifo_o_tready && data_fifo_o_hdr && pass_packet) begin + result_tuser_valid <= 1'b0; + end + + // Load the result of the lookup + if (lookup_done_stb) begin + result_tuser <= lookup_result_match ? lookup_result_value : {USER_W{1'b0}}; + result_tuser_valid <= 1'b1; + end else if (non_lookup_done_stb) begin + result_tuser <= lookup_fifo_tuser; + result_tuser_valid <= 1'b1; + end + end + end + + // Control when the packet from the data_fifo can be passed through. Put the + // tuser value into a register for the duration of the packet. + always @(posedge clk) begin + if (rst) begin + pass_packet <= 1'b0; + reg_o_tuser <= {USER_W{1'bX}}; // Don't care + end else begin + // We're done passing through a packet when tlast goes out + if (data_fifo_o_tvalid && data_fifo_o_tready && data_fifo_o_tlast && pass_packet) begin + pass_packet <= 1'b0; + end + + // We can pass the next packet through when we're at the start of a + // packet and we have the tuser value waiting in the holding register. + if (data_fifo_o_hdr && result_tuser_valid && !pass_packet) begin + reg_o_tuser <= result_tuser; + pass_packet <= 1'b1; + end + end + end + + assign o_xport_tdata = data_fifo_o_tdata; + assign o_xport_tuser = reg_o_tuser; + assign o_xport_tlast = data_fifo_o_tlast; + assign o_xport_tvalid = data_fifo_o_tvalid & pass_packet; + assign data_fifo_o_tready = o_xport_tready & pass_packet; + +endmodule // chdr_xport_adapter_generic
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/xport/eth_interface.v b/fpga/usrp3/lib/rfnoc/xport/eth_interface.v new file mode 100644 index 000000000..21e8b809d --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/xport/eth_interface.v @@ -0,0 +1,252 @@ +// +// Copyright 2019 Ettus Research, a National Instruments Brand +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Adapts from internal VITA to ethernet packets. Also handles CPU and ethernet crossover interfaces. + +module eth_interface #( + parameter [15:0] PROTOVER = {8'd1, 8'd0}, + parameter MTU = 10, + parameter NODE_INST = 0, + parameter RT_TBL_SIZE = 6, + parameter REG_AWIDTH = 14, + parameter BASE = 0 +) ( + input clk, + input reset, + input [15:0] device_id, + + // Register port: Write port (domain: clk) + input reg_wr_req, + input [REG_AWIDTH-1:0] reg_wr_addr, + input [31:0] reg_wr_data, + + // Register port: Read port (domain: clk) + input reg_rd_req, + input [REG_AWIDTH-1:0] reg_rd_addr, + output reg reg_rd_resp, + output reg [31:0] reg_rd_data, + + // Status ports (domain: clk) + output [47:0] my_mac, + output [31:0] my_ip, + output [15:0] my_udp_port, + + // Ethernet ports + output [63:0] eth_tx_tdata, + output [3:0] eth_tx_tuser, + output eth_tx_tlast, + output eth_tx_tvalid, + input eth_tx_tready, + input [63:0] eth_rx_tdata, + input [3:0] eth_rx_tuser, + input eth_rx_tlast, + input eth_rx_tvalid, + output eth_rx_tready, + + // Vita router interface + output [63:0] e2v_tdata, + output e2v_tlast, + output e2v_tvalid, + input e2v_tready, + input [63:0] v2e_tdata, + input v2e_tlast, + input v2e_tvalid, + output v2e_tready, + + // CPU + output [63:0] e2c_tdata, + output [3:0] e2c_tuser, + output e2c_tlast, + output e2c_tvalid, + input e2c_tready, + input [63:0] c2e_tdata, + input [3:0] c2e_tuser, + input c2e_tlast, + input c2e_tvalid, + output c2e_tready +); + + localparam [47:0] DEFAULT_MAC_ADDR = {8'h00, 8'h80, 8'h2f, 8'h16, 8'hc5, 8'h2f}; + localparam [31:0] DEFAULT_IP_ADDR = {8'd192, 8'd168, 8'd10, 8'd2}; + localparam [31:0] DEFAULT_UDP_PORT = 16'd49153; + + //--------------------------------------------------------- + // Registers + //--------------------------------------------------------- + + // Allocate one full page for MAC + localparam [REG_AWIDTH-1:0] REG_MAC_LSB = BASE + 'h0000; + localparam [REG_AWIDTH-1:0] REG_MAC_MSB = BASE + 'h0004; + + // Source IP address + localparam [REG_AWIDTH-1:0] REG_IP = BASE + 'h1000; + // Source UDP Port + localparam [REG_AWIDTH-1:0] REG_UDP = BASE + 'h1004; + + // Registers for Internal/Bridge Network Mode in CPU + localparam [REG_AWIDTH-1:0] REG_BRIDGE_MAC_LSB = BASE + 'h1010; + localparam [REG_AWIDTH-1:0] REG_BRIDGE_MAC_MSB = BASE + 'h1014; + localparam [REG_AWIDTH-1:0] REG_BRIDGE_IP = BASE + 'h1018; + localparam [REG_AWIDTH-1:0] REG_BRIDGE_UDP = BASE + 'h101c; + localparam [REG_AWIDTH-1:0] REG_BRIDGE_ENABLE = BASE + 'h1020; + + // MAC address for the dispatcher module. + // This value is used to determine if the packet is meant + // for this device should be consumed + // IP address for the dispatcher module. + // This value is used to determine if the packet is addressed + // to this device + // This module supports two destination ports + reg [47:0] mac_reg; + reg [31:0] ip_reg; + reg [15:0] udp_port; + reg [47:0] bridge_mac_reg; + reg [31:0] bridge_ip_reg; + reg [15:0] bridge_udp_port; + reg bridge_en; + + assign my_mac = bridge_en ? bridge_mac_reg : mac_reg; + assign my_ip = bridge_en ? bridge_ip_reg : ip_reg; + assign my_udp_port = bridge_en ? bridge_udp_port : udp_port; + + always @(posedge clk) begin + if (reset) begin + mac_reg <= DEFAULT_MAC_ADDR; + ip_reg <= DEFAULT_IP_ADDR; + udp_port <= DEFAULT_UDP_PORT; + bridge_en <= 1'b0; + bridge_mac_reg <= DEFAULT_MAC_ADDR; + bridge_ip_reg <= DEFAULT_IP_ADDR; + bridge_udp_port <= DEFAULT_UDP_PORT; + end + else begin + if (reg_wr_req) + case (reg_wr_addr) + + REG_MAC_LSB: + mac_reg[31:0] <= reg_wr_data; + + REG_MAC_MSB: + mac_reg[47:32] <= reg_wr_data[15:0]; + + REG_IP: + ip_reg <= reg_wr_data; + + REG_UDP: + udp_port <= reg_wr_data[15:0]; + + REG_BRIDGE_MAC_LSB: + bridge_mac_reg[31:0] <= reg_wr_data; + + REG_BRIDGE_MAC_MSB: + bridge_mac_reg[47:32] <= reg_wr_data[15:0]; + + REG_BRIDGE_IP: + bridge_ip_reg <= reg_wr_data; + + REG_BRIDGE_UDP: + bridge_udp_port <= reg_wr_data[15:0]; + + REG_BRIDGE_ENABLE: + bridge_en <= reg_wr_data[0]; + endcase + end + end + + always @ (posedge clk) begin + // No reset handling required for readback + if (reg_rd_req) begin + // Assert read response one cycle after read request + reg_rd_resp <= 1'b1; + case (reg_rd_addr) + REG_MAC_LSB: + reg_rd_data <= mac_reg[31:0]; + + REG_MAC_MSB: + reg_rd_data <= {16'b0,mac_reg[47:32]}; + + REG_IP: + reg_rd_data <= ip_reg; + + REG_UDP: + reg_rd_data <= {16'b0, udp_port}; + + REG_BRIDGE_MAC_LSB: + reg_rd_data <= bridge_mac_reg[31:0]; + + REG_BRIDGE_MAC_MSB: + reg_rd_data <= {16'b0,bridge_mac_reg[47:32]}; + + REG_BRIDGE_IP: + reg_rd_data <= bridge_ip_reg; + + REG_BRIDGE_UDP: + reg_rd_data <= {16'b0, bridge_udp_port}; + + REG_BRIDGE_ENABLE: + reg_rd_data <= {31'b0,bridge_en}; + + default: + reg_rd_resp <= 1'b0; + endcase + end + // Deassert read response after one clock cycle + if (reg_rd_resp) begin + reg_rd_resp <= 1'b0; + end + end + + // In AXI Stream, tkeep is the byte qualifier that indicates + // whether the content of the associated byte + // of TDATA is processed as part of the data stream. + // tuser as used in eth_switch is the numbier of valid bytes + + eth_ipv4_chdr64_adapter #( + .PROTOVER (PROTOVER), + .MTU (MTU), + .CPU_FIFO_SIZE (MTU), + .RT_TBL_SIZE (RT_TBL_SIZE), + .NODE_INST (NODE_INST), + .DROP_UNKNOWN_MAC(0), + .IS_CPU_ARM (1) + ) eth_adapter_i ( + .clk (clk ), + .rst (reset ), + .device_id (device_id ), + .s_mac_tdata (eth_rx_tdata ), + .s_mac_tuser (eth_rx_tuser ), + .s_mac_tlast (eth_rx_tlast ), + .s_mac_tvalid (eth_rx_tvalid), + .s_mac_tready (eth_rx_tready), + .m_mac_tdata (eth_tx_tdata ), + .m_mac_tuser (eth_tx_tuser ), + .m_mac_tlast (eth_tx_tlast ), + .m_mac_tvalid (eth_tx_tvalid), + .m_mac_tready (eth_tx_tready), + .s_chdr_tdata (v2e_tdata ), + .s_chdr_tlast (v2e_tlast ), + .s_chdr_tvalid (v2e_tvalid ), + .s_chdr_tready (v2e_tready ), + .m_chdr_tdata (e2v_tdata ), + .m_chdr_tlast (e2v_tlast ), + .m_chdr_tvalid (e2v_tvalid ), + .m_chdr_tready (e2v_tready ), + .s_cpu_tdata (c2e_tdata ), + .s_cpu_tuser (c2e_tuser ), + .s_cpu_tlast (c2e_tlast ), + .s_cpu_tvalid (c2e_tvalid ), + .s_cpu_tready (c2e_tready ), + .m_cpu_tdata (e2c_tdata ), + .m_cpu_tuser (e2c_tuser ), + .m_cpu_tlast (e2c_tlast ), + .m_cpu_tvalid (e2c_tvalid ), + .m_cpu_tready (e2c_tready ), + .my_eth_addr (my_mac ), + .my_ipv4_addr (my_ip ), + .my_udp_chdr_port(my_udp_port ) + ); + + +endmodule // eth_interface diff --git a/fpga/usrp3/lib/rfnoc/xport/eth_internal.v b/fpga/usrp3/lib/rfnoc/xport/eth_internal.v new file mode 100644 index 000000000..49cf838a5 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/xport/eth_internal.v @@ -0,0 +1,433 @@ +/////////////////////////////////////////////////////////////////// +// +// Copyright 2019 Ettus Research, a National Instruments brand +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: eth_internal +// Description: +// Reduces clutter at top level. +// - FPGA-internal Ethernet port +// - ARP responder instead of other CPU connection +// +////////////////////////////////////////////////////////////////////// + +`default_nettype none +module eth_internal #( + parameter DWIDTH = 32, + parameter AWIDTH = 14, + parameter [7:0] PORTNUM = 0, + parameter [15:0] RFNOC_PROTOVER = {8'd1, 8'd0} +)( + // Resets + input wire bus_rst, + + // Clocks + input wire bus_clk, + + //Axi-lite + input wire s_axi_aclk, + input wire s_axi_aresetn, + input wire [AWIDTH-1:0] s_axi_awaddr, + input wire s_axi_awvalid, + output wire s_axi_awready, + + input wire [DWIDTH-1:0] s_axi_wdata, + input wire [DWIDTH/8-1:0] s_axi_wstrb, + input wire s_axi_wvalid, + output wire s_axi_wready, + + output wire [1:0] s_axi_bresp, + output wire s_axi_bvalid, + input wire s_axi_bready, + + input wire [AWIDTH-1:0] s_axi_araddr, + input wire s_axi_arvalid, + output wire s_axi_arready, + + output wire [DWIDTH-1:0] s_axi_rdata, + output wire [1:0] s_axi_rresp, + output wire s_axi_rvalid, + input wire s_axi_rready, + + // Host-Ethernet DMA interface + output wire [63:0] e2h_tdata, + output wire [7:0] e2h_tkeep, + output wire e2h_tlast, + output wire e2h_tvalid, + input wire e2h_tready, + + input wire [63:0] h2e_tdata, + input wire [7:0] h2e_tkeep, + input wire h2e_tlast, + input wire h2e_tvalid, + output wire h2e_tready, + + // RFNoC interface + output wire [63:0] e2v_tdata, + output wire e2v_tlast, + output wire e2v_tvalid, + input wire e2v_tready, + + input wire [63:0] v2e_tdata, + input wire v2e_tlast, + input wire v2e_tvalid, + output wire v2e_tready, + + // MISC + output wire [31:0] port_info, + input wire [15:0] device_id, + + output wire link_up, + output reg activity + +); + + localparam REG_BASE_ETH_IO = 14'h0; + localparam REG_BASE_ETH_SWITCH = 14'h1000; + + // AXI4-Lite to RegPort (PS to PL Register Access) + wire reg_wr_req; + wire [AWIDTH-1:0] reg_wr_addr; + wire [DWIDTH-1:0] reg_wr_data; + wire reg_rd_req; + wire [AWIDTH-1:0] reg_rd_addr; + wire reg_rd_resp, reg_rd_resp_eth_if; + reg reg_rd_resp_io = 1'b0; + wire [DWIDTH-1:0] reg_rd_data, reg_rd_data_eth_if; + reg [DWIDTH-1:0] reg_rd_data_io = 'd0; + + axil_regport_master #( + .DWIDTH (DWIDTH), // Width of the AXI4-Lite data bus (must be 32 or 64) + .AWIDTH (AWIDTH), // Width of the address bus + .WRBASE (0), // Write address base + .RDBASE (0), // Read address base + .TIMEOUT (10) // log2(timeout). Read will timeout after (2^TIMEOUT - 1) cycles + ) eth_dma_reg_mst_i ( + // Clock and reset + .s_axi_aclk (s_axi_aclk), + .s_axi_aresetn (s_axi_aresetn), + // AXI4-Lite: Write address port (domain: s_axi_aclk) + .s_axi_awaddr (s_axi_awaddr), + .s_axi_awvalid (s_axi_awvalid), + .s_axi_awready (s_axi_awready), + // AXI4-Lite: Write data port (domain: s_axi_aclk) + .s_axi_wdata (s_axi_wdata), + .s_axi_wstrb (s_axi_wstrb), + .s_axi_wvalid (s_axi_wvalid), + .s_axi_wready (s_axi_wready), + // AXI4-Lite: Write response port (domain: s_axi_aclk) + .s_axi_bresp (s_axi_bresp), + .s_axi_bvalid (s_axi_bvalid), + .s_axi_bready (s_axi_bready), + // AXI4-Lite: Read address port (domain: s_axi_aclk) + .s_axi_araddr (s_axi_araddr), + .s_axi_arvalid (s_axi_arvalid), + .s_axi_arready (s_axi_arready), + // AXI4-Lite: Read data port (domain: s_axi_aclk) + .s_axi_rdata (s_axi_rdata), + .s_axi_rresp (s_axi_rresp), + .s_axi_rvalid (s_axi_rvalid), + .s_axi_rready (s_axi_rready), + // Register port: Write port (domain: reg_clk) + .reg_clk (bus_clk), + .reg_wr_req (reg_wr_req), + .reg_wr_addr (reg_wr_addr), + .reg_wr_data (reg_wr_data), + // Register port: Read port (domain: reg_clk) + .reg_rd_req (reg_rd_req), + .reg_rd_addr (reg_rd_addr), + .reg_rd_resp (reg_rd_resp), + .reg_rd_data (reg_rd_data) + ); + + // Regport Mux for response + regport_resp_mux #( + .WIDTH (DWIDTH), + .NUM_SLAVES (2) + ) reg_resp_mux_i ( + .clk(bus_clk), .reset(bus_rst), + .sla_rd_resp({reg_rd_resp_eth_if, reg_rd_resp_io}), + .sla_rd_data({reg_rd_data_eth_if, reg_rd_data_io}), + .mst_rd_resp(reg_rd_resp), .mst_rd_data(reg_rd_data) + ); + + // ARP responder + wire [63:0] e2c_tdata; + wire [7:0] e2c_tkeep; + wire e2c_tlast; + wire e2c_tvalid; + wire e2c_tready; + + wire [63:0] c2e_tdata; + wire [7:0] c2e_tkeep; + wire c2e_tlast; + wire c2e_tvalid; + wire c2e_tready; + + wire [3:0] e2c_tuser; + wire [3:0] c2e_tuser; + + // ARM Host-to-Ethernet + wire [3:0] e2h_tuser; + wire [3:0] h2e_tuser; + + // Host Ethernet-to-CHDR + wire [63:0] h2e_chdr_tdata; + wire [3:0] h2e_chdr_tuser; + wire h2e_chdr_tlast; + wire h2e_chdr_tvalid; + wire h2e_chdr_tready; + wire [63:0] e2h_chdr_tdata; + wire [3:0] e2h_chdr_tuser; + wire e2h_chdr_tlast; + wire e2h_chdr_tvalid; + wire e2h_chdr_tready; + + + // In AXI Stream, tkeep is the byte qualifier that indicates + // whether the content of the associated byte + // of TDATA is processed as part of the data stream. + // tuser as used in eth_interface is the number of valid bytes + + // Converting tuser to tkeep for ingress packets + assign e2c_tkeep = ~e2c_tlast ? 8'b1111_1111 + : (e2c_tuser == 4'd0) ? 8'b1111_1111 + : (e2c_tuser == 4'd1) ? 8'b0000_0001 + : (e2c_tuser == 4'd2) ? 8'b0000_0011 + : (e2c_tuser == 4'd3) ? 8'b0000_0111 + : (e2c_tuser == 4'd4) ? 8'b0000_1111 + : (e2c_tuser == 4'd5) ? 8'b0001_1111 + : (e2c_tuser == 4'd6) ? 8'b0011_1111 + : 8'b0111_1111; + + // Converting tkeep to tuser for egress packets + assign c2e_tuser = ~c2e_tlast ? 4'd0 + : (c2e_tkeep == 8'b1111_1111) ? 4'd0 + : (c2e_tkeep == 8'b0111_1111) ? 4'd7 + : (c2e_tkeep == 8'b0011_1111) ? 4'd6 + : (c2e_tkeep == 8'b0001_1111) ? 4'd5 + : (c2e_tkeep == 8'b0000_1111) ? 4'd4 + : (c2e_tkeep == 8'b0000_0111) ? 4'd3 + : (c2e_tkeep == 8'b0000_0011) ? 4'd2 + : (c2e_tkeep == 8'b0000_0001) ? 4'd1 + : 4'd0; + + // Converting tuser to tkeep for ingress packets + assign e2h_tkeep = ~e2h_tlast ? 8'b1111_1111 + : (e2h_tuser == 4'd0) ? 8'b1111_1111 + : (e2h_tuser == 4'd1) ? 8'b0000_0001 + : (e2h_tuser == 4'd2) ? 8'b0000_0011 + : (e2h_tuser == 4'd3) ? 8'b0000_0111 + : (e2h_tuser == 4'd4) ? 8'b0000_1111 + : (e2h_tuser == 4'd5) ? 8'b0001_1111 + : (e2h_tuser == 4'd6) ? 8'b0011_1111 + : 8'b0111_1111; + + // Converting tkeep to tuser for egress packets + assign h2e_tuser = ~h2e_tlast ? 4'd0 + : (h2e_tkeep == 8'b1111_1111) ? 4'd0 + : (h2e_tkeep == 8'b0111_1111) ? 4'd7 + : (h2e_tkeep == 8'b0011_1111) ? 4'd6 + : (h2e_tkeep == 8'b0001_1111) ? 4'd5 + : (h2e_tkeep == 8'b0000_1111) ? 4'd4 + : (h2e_tkeep == 8'b0000_0111) ? 4'd3 + : (h2e_tkeep == 8'b0000_0011) ? 4'd2 + : (h2e_tkeep == 8'b0000_0001) ? 4'd1 + : 4'd0; + + // FPGA-side addresses for the ARP responder + wire [47:0] my_mac; + wire [31:0] my_ip; + wire [15:0] my_udp_port; + + arm_deframer arm_deframer_i ( + .clk(bus_clk), + .reset(bus_rst), + .clear(1'b0), + .s_axis_tdata(h2e_tdata), + .s_axis_tuser(h2e_tuser), + .s_axis_tlast(h2e_tlast), + .s_axis_tvalid(h2e_tvalid), + .s_axis_tready(h2e_tready), + .m_axis_tdata(h2e_chdr_tdata), + .m_axis_tuser(h2e_chdr_tuser), + .m_axis_tlast(h2e_chdr_tlast), + .m_axis_tvalid(h2e_chdr_tvalid), + .m_axis_tready(h2e_chdr_tready) + ); + + axi64_to_xge64 arm_framer ( + .clk(bus_clk), + .reset(bus_rst), + .clear(1'b0), + .s_axis_tdata(e2h_chdr_tdata), + .s_axis_tuser(e2h_chdr_tuser), + .s_axis_tlast(e2h_chdr_tlast), + .s_axis_tvalid(e2h_chdr_tvalid), + .s_axis_tready(e2h_chdr_tready), + .m_axis_tdata(e2h_tdata), + .m_axis_tuser(e2h_tuser), + .m_axis_tlast(e2h_tlast), + .m_axis_tvalid(e2h_tvalid), + .m_axis_tready(e2h_tready) + ); + + eth_interface #( + .PROTOVER(RFNOC_PROTOVER), + .MTU(10), + .NODE_INST(0), + .REG_AWIDTH (AWIDTH), + .BASE(REG_BASE_ETH_SWITCH) + ) eth_interface ( + .clk (bus_clk), + .reset (bus_rst), + .device_id (device_id), + .reg_wr_req (reg_wr_req), + .reg_wr_addr (reg_wr_addr), + .reg_wr_data (reg_wr_data), + .reg_rd_req (reg_rd_req), + .reg_rd_addr (reg_rd_addr), + .reg_rd_resp (reg_rd_resp_eth_if), + .reg_rd_data (reg_rd_data_eth_if), + .my_mac (my_mac), + .my_ip (my_ip), + .my_udp_port (my_udp_port), + .eth_tx_tdata (e2h_chdr_tdata), + .eth_tx_tuser (e2h_chdr_tuser), + .eth_tx_tlast (e2h_chdr_tlast), + .eth_tx_tvalid (e2h_chdr_tvalid), + .eth_tx_tready (e2h_chdr_tready), + .eth_rx_tdata (h2e_chdr_tdata), + .eth_rx_tuser (h2e_chdr_tuser), + .eth_rx_tlast (h2e_chdr_tlast), + .eth_rx_tvalid (h2e_chdr_tvalid), + .eth_rx_tready (h2e_chdr_tready), + .e2v_tdata (e2v_tdata), + .e2v_tlast (e2v_tlast), + .e2v_tvalid (e2v_tvalid), + .e2v_tready (e2v_tready), + .v2e_tdata (v2e_tdata), + .v2e_tlast (v2e_tlast), + .v2e_tvalid (v2e_tvalid), + .v2e_tready (v2e_tready), + .e2c_tdata (e2c_tdata), + .e2c_tuser (e2c_tuser), + .e2c_tlast (e2c_tlast), + .e2c_tvalid (e2c_tvalid), + .e2c_tready (e2c_tready), + .c2e_tdata (c2e_tdata), + .c2e_tuser (c2e_tuser), + .c2e_tlast (c2e_tlast), + .c2e_tvalid (c2e_tvalid), + .c2e_tready (c2e_tready) + ); + + arp_responder arp_responder_i ( + .aclk (bus_clk), + .aresetn (~bus_rst), + .mac_addr (my_mac), + .ip_addr (my_ip), + .s_axis_tdata (e2c_tdata), + .s_axis_tvalid (e2c_tvalid), + .s_axis_tready (e2c_tready), + .s_axis_tkeep (e2c_tkeep), + .s_axis_tlast (e2c_tlast), + .s_axis_tuser (1'b0), + .m_axis_tdata (c2e_tdata), + .m_axis_tvalid (c2e_tvalid), + .m_axis_tready (c2e_tready), + .m_axis_tkeep (c2e_tkeep), + .m_axis_tlast (c2e_tlast), + .m_axis_tuser () + ); + + //----------------------------------------------------------------- + // "I/O" Registers + //----------------------------------------------------------------- + localparam [7:0] COMPAT_NUM = 8'd2; + localparam [7:0] MGT_PROTOCOL = 8'd4; // 10 GbE Internal (8'd2 is 10 GbE External) + + // Common registers + localparam REG_PORT_INFO = REG_BASE_ETH_IO + 'h0; + localparam REG_MAC_CTRL_STATUS = REG_BASE_ETH_IO + 'h4; + localparam REG_PHY_CTRL_STATUS = REG_BASE_ETH_IO + 'h8; + localparam REG_MAC_LED_CTL = REG_BASE_ETH_IO + 'hC; + + // Protocol specific constants + localparam [1:0] MAC_LED_CTL_RST_VAL = 2'h0; + + localparam [31:0] MAC_CTRL_RST_VAL = {31'h0, 1'b1}; // tx_enable on reset + localparam [31:0] PHY_CTRL_RST_VAL = 32'h0; + + // Writable registers + reg [31:0] mac_ctrl_reg = MAC_CTRL_RST_VAL; + reg [31:0] phy_ctrl_reg = PHY_CTRL_RST_VAL; + reg [1:0] mac_led_ctl = MAC_LED_CTL_RST_VAL; + + always @(posedge bus_clk) begin + if (bus_rst) begin + mac_ctrl_reg <= MAC_CTRL_RST_VAL; + phy_ctrl_reg <= PHY_CTRL_RST_VAL; + mac_led_ctl <= MAC_LED_CTL_RST_VAL; + end else if (reg_wr_req) begin + case(reg_wr_addr) + REG_MAC_CTRL_STATUS: + mac_ctrl_reg <= reg_wr_data; + REG_PHY_CTRL_STATUS: + phy_ctrl_reg <= reg_wr_data; + REG_MAC_LED_CTL: + mac_led_ctl <= reg_wr_data[1:0]; + endcase + end + end + + // Readable registers + wire [31:0] mac_status, phy_status; + + assign port_info = {COMPAT_NUM, 6'h0, activity, link_up, MGT_PROTOCOL, PORTNUM}; + + always @(posedge bus_clk) begin + // No reset handling needed for readback + if (reg_rd_req) begin + reg_rd_resp_io <= 1'b1; + case(reg_rd_addr) + REG_PORT_INFO: + reg_rd_data_io <= port_info; + REG_MAC_CTRL_STATUS: + reg_rd_data_io <= mac_status; + REG_PHY_CTRL_STATUS: + reg_rd_data_io <= phy_status; + REG_MAC_LED_CTL: + reg_rd_data_io <= {30'd0, mac_led_ctl}; + default: + reg_rd_resp_io <= 1'b0; + endcase + end if (reg_rd_resp_io) begin + reg_rd_resp_io <= 1'b0; + end + end + + assign mac_status = 'd0; + assign phy_status[31:8] = 24'h0; + assign link_up = 1'b1; + + wire identify_enable = mac_led_ctl[0]; + wire identify_value = mac_led_ctl[1]; + + //----------------------------------------------------------------- + // Activity detector + //----------------------------------------------------------------- + wire activity_int; + + pulse_stretch act_pulse_str_i ( + .clk(bus_clk), + .rst(bus_rst | ~link_up), + .pulse((h2e_tvalid & h2e_tready) | (e2h_tvalid & e2h_tready)), + .pulse_stretched(activity_int) + ); + + always @ (posedge bus_clk) activity <= identify_enable ? identify_value : activity_int; + +endmodule +`default_nettype wire diff --git a/fpga/usrp3/lib/rfnoc/xport/eth_ipv4_chdr64_adapter.v b/fpga/usrp3/lib/rfnoc/xport/eth_ipv4_chdr64_adapter.v new file mode 100644 index 000000000..66c1b521e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/xport/eth_ipv4_chdr64_adapter.v @@ -0,0 +1,397 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: eth_ipv4_chdr64_adapter +// Description: A generic transport adapter module that can be used in +// a veriety of transports. It does the following: +// - Exposes a configuration port for mgmt packets to configure the node +// - Implements a return-address map for packets with metadata other than +// the CHDR. Additional metadata can be passed as a tuser to this module +// which will store it in a map indexed by the SrcEPID in a management +// packet. For all returning packets, the metadata will be looked up in +// the map and attached as the outgoing tuser. +// - Implements a loopback path for node-info discovery +// +// Parameters: +// - PROTOVER: RFNoC protocol version {8'd<major>, 8'd<minor>} +// - MTU: Log2 of the MTU of the packet in 64-bit words +// - CPU_FIFO_SIZE: Log2 of the FIFO depth (in 64-bit words) for the CPU egress path +// - RT_TBL_SIZE: Log2 of the depth of the return-address routing table +// - NODE_INST: The node type to return for a node-info discovery +// - DROP_UNKNOWN_MAC: Drop packets not addressed to us? +// +// Signals: +// - device_id : The ID of the device that has instantiated this module +// - s_mac_*: The input Ethernet stream from the MAC (plus tuser for trailing bytes + err) +// - m_mac_*: The output Ethernet stream to the MAC (plus tuser for trailing bytes + err) +// - s_chdr_*: The input CHDR stream from the rfnoc infrastructure +// - m_chdr_*: The output CHDR stream to the rfnoc infrastructure +// - s_cpu_*: The input Ethernet stream from the CPU (plus tuser for trailing bytes + err) +// - m_cpu_*: The output Ethernet stream to the CPU (plus tuser for trailing bytes + err) +// - my_eth_addr: The Ethernet (MAC) address of this endpoint +// - my_ipv4_addr: The IPv4 address of this endpoint +// - my_udp_chdr_port: The UDP port allocated for CHDR traffic on this endpoint +// + +`default_nettype none +module eth_ipv4_chdr64_adapter #( + parameter [15:0] PROTOVER = {8'd1, 8'd0}, + parameter MTU = 10, + parameter CPU_FIFO_SIZE = MTU, + parameter RT_TBL_SIZE = 6, + parameter NODE_INST = 0, + parameter [0:0] DROP_UNKNOWN_MAC = 1, + parameter [0:0] IS_CPU_ARM = 0 +)( + // Clocking and reset interface + input wire clk, + input wire rst, + // Device info + input wire [15:0] device_id, + // AXI-Stream interface to/from MAC + input wire [63:0] s_mac_tdata, + input wire [3:0] s_mac_tuser, + input wire s_mac_tlast, + input wire s_mac_tvalid, + output wire s_mac_tready, + output wire [63:0] m_mac_tdata, + output wire [3:0] m_mac_tuser, + output wire m_mac_tlast, + output wire m_mac_tvalid, + input wire m_mac_tready, + // AXI-Stream interface to/from CHDR infrastructure + input wire [63:0] s_chdr_tdata, + input wire s_chdr_tlast, + input wire s_chdr_tvalid, + output wire s_chdr_tready, + output wire [63:0] m_chdr_tdata, + output wire m_chdr_tlast, + output wire m_chdr_tvalid, + input wire m_chdr_tready, + // AXI-Stream interface to/from CPU + input wire [63:0] s_cpu_tdata, + input wire [3:0] s_cpu_tuser, + input wire s_cpu_tlast, + input wire s_cpu_tvalid, + output wire s_cpu_tready, + output wire [63:0] m_cpu_tdata, + output wire [3:0] m_cpu_tuser, + output wire m_cpu_tlast, + output wire m_cpu_tvalid, + input wire m_cpu_tready, + // Device addresses + input wire [47:0] my_eth_addr, + input wire [31:0] my_ipv4_addr, + input wire [15:0] my_udp_chdr_port +); + + `include "../core/rfnoc_chdr_utils.vh" + `include "../core/rfnoc_chdr_internal_utils.vh" + `include "rfnoc_xport_types.vh" + + //----------------------------------------------------------------------- + // Byte-swapping function + // Ethernet fields we wrote out left-to-right, but AXI-Stream time-orders + // its data right-to-left. + //----------------------------------------------------------------------- + function [63:0] bswap64( + input [63:0] din + ); + begin + bswap64 = {din[0 +: 8], din[8 +: 8], din[16 +: 8], din[24 +: 8], + din[32+: 8], din[40+: 8], din[48 +: 8], din[56 +: 8]}; + end + endfunction + + //--------------------------------------- + // E2X and E2C DEMUX + //--------------------------------------- + wire [63:0] e2x_chdr_tdata; + wire [95:0] e2x_chdr_tuser; + wire e2x_chdr_tlast, e2x_chdr_tvalid, e2x_chdr_tready; + wire [63:0] e2c_chdr_tdata; + wire [3:0] e2c_chdr_tuser; + wire e2c_chdr_tlast, e2c_chdr_tvalid, e2c_chdr_tready; + + // Ethernet sink. Inspects packet and dispatches + // to the correct port. + eth_ipv4_chdr64_dispatch #( + .DROP_UNKNOWN_MAC(DROP_UNKNOWN_MAC) + ) eth_dispatch_i ( + .clk (clk), + .rst (rst), + .s_mac_tdata (s_mac_tdata), + .s_mac_tuser (s_mac_tuser), + .s_mac_tlast (s_mac_tlast), + .s_mac_tvalid (s_mac_tvalid), + .s_mac_tready (s_mac_tready), + .m_chdr_tdata (e2x_chdr_tdata), + .m_chdr_tuser (e2x_chdr_tuser), + .m_chdr_tlast (e2x_chdr_tlast), + .m_chdr_tvalid (e2x_chdr_tvalid), + .m_chdr_tready (e2x_chdr_tready), + .m_cpu_tdata (e2c_chdr_tdata), + .m_cpu_tuser (e2c_chdr_tuser), + .m_cpu_tlast (e2c_chdr_tlast), + .m_cpu_tvalid (e2c_chdr_tvalid), + .m_cpu_tready (e2c_chdr_tready), + .my_eth_addr (my_eth_addr), + .my_ipv4_addr (my_ipv4_addr), + .my_udp_chdr_port (my_udp_chdr_port) + ); + + //--------------------------------------- + // CHDR Transport Adapter + //--------------------------------------- + + wire [63:0] x2e_chdr_tdata; + wire [95:0] x2e_chdr_tuser; + wire x2e_chdr_tlast, x2e_chdr_tvalid, x2e_chdr_tready; + wire [63:0] e2x_fifo_tdata; + wire e2x_fifo_tlast, e2x_fifo_tvalid, e2x_fifo_tready; + wire [63:0] e2c_fifo_tdata; + wire [3:0] e2c_fifo_tuser; + wire e2c_fifo_tlast, e2c_fifo_tvalid, e2c_fifo_tready; + + chdr_xport_adapter_generic #( + .PROTOVER(PROTOVER), .CHDR_W(64), + .USER_W(96), .TBL_SIZE(RT_TBL_SIZE), + .NODE_SUBTYPE(NODE_SUBTYPE_XPORT_IPV4_CHDR64), .NODE_INST(NODE_INST) + ) xport_adapter_gen_i ( + .clk (clk), + .rst (rst), + .device_id (device_id), + .s_axis_xport_tdata (e2x_chdr_tdata), + .s_axis_xport_tuser (e2x_chdr_tuser), + .s_axis_xport_tlast (e2x_chdr_tlast), + .s_axis_xport_tvalid(e2x_chdr_tvalid), + .s_axis_xport_tready(e2x_chdr_tready), + .m_axis_xport_tdata (x2e_chdr_tdata), + .m_axis_xport_tuser (x2e_chdr_tuser), + .m_axis_xport_tlast (x2e_chdr_tlast), + .m_axis_xport_tvalid(x2e_chdr_tvalid), + .m_axis_xport_tready(x2e_chdr_tready), + .s_axis_rfnoc_tdata (s_chdr_tdata), + .s_axis_rfnoc_tlast (s_chdr_tlast), + .s_axis_rfnoc_tvalid(s_chdr_tvalid), + .s_axis_rfnoc_tready(s_chdr_tready), + .m_axis_rfnoc_tdata (e2x_fifo_tdata), + .m_axis_rfnoc_tlast (e2x_fifo_tlast), + .m_axis_rfnoc_tvalid(e2x_fifo_tvalid), + .m_axis_rfnoc_tready(e2x_fifo_tready), + .ctrlport_req_wr (/* unused */), + .ctrlport_req_rd (/* unused */), + .ctrlport_req_addr (/* unused */), + .ctrlport_req_data (/* unused */), + .ctrlport_resp_ack (/* unused */), + .ctrlport_resp_data (/* unused */) + ); + + generate + if (IS_CPU_ARM == 1'b1) begin + //--------------------------------------- + // Ethernet framer for ARM + //--------------------------------------- + + // Strip the 6 octet ethernet padding we used internally + // before sending to ARM. + // Put SOF into bit[3] of tuser. + axi64_to_xge64 arm_framer ( + .clk(clk), + .reset(rst), + .clear(1'b0), + .s_axis_tdata(e2c_chdr_tdata), + .s_axis_tuser(e2c_chdr_tuser), + .s_axis_tlast(e2c_chdr_tlast), + .s_axis_tvalid(e2c_chdr_tvalid), + .s_axis_tready(e2c_chdr_tready), + .m_axis_tdata(e2c_fifo_tdata), + .m_axis_tuser(e2c_fifo_tuser), + .m_axis_tlast(e2c_fifo_tlast), + .m_axis_tvalid(e2c_fifo_tvalid), + .m_axis_tready(e2c_fifo_tready) + ); + end else begin + assign e2c_fifo_tdata = e2c_chdr_tdata; + assign e2c_fifo_tuser = e2c_chdr_tuser; + assign e2c_fifo_tlast = e2c_chdr_tlast; + assign e2c_fifo_tvalid = e2c_chdr_tvalid; + assign e2c_chdr_tready = e2c_fifo_tready; + end + endgenerate + + //--------------------------------------- + // E2X and E2C Output Buffering + //--------------------------------------- + + // The CPU can be slow to respond (relative to packet wirespeed) so + // extra buffer for packets destined there so it doesn't back up. + axi_fifo #( + .WIDTH(64+4+1),.SIZE(CPU_FIFO_SIZE) + ) cpu_fifo_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({e2c_fifo_tlast, e2c_fifo_tuser, e2c_fifo_tdata}), + .i_tvalid(e2c_fifo_tvalid), .i_tready(e2c_fifo_tready), + .o_tdata({m_cpu_tlast, m_cpu_tuser, m_cpu_tdata}), + .o_tvalid(m_cpu_tvalid), .o_tready(m_cpu_tready), + .occupied(), .space() + ); + + // The transport should hook up to a crossbar downstream, which + // may backpressure this module because it is in the middle of + // transferring a packet. To ensure that upstream logic is not + // blocked, we instantiate one packet worth of buffering here. + axi_fifo #( + .WIDTH(64+1),.SIZE(MTU) + ) chdr_fifo_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({e2x_fifo_tlast, e2x_fifo_tdata}), + .i_tvalid(e2x_fifo_tvalid), .i_tready(e2x_fifo_tready), + .o_tdata({m_chdr_tlast, m_chdr_tdata}), + .o_tvalid(m_chdr_tvalid), .o_tready(m_chdr_tready), + .occupied(), .space() + ); + + //--------------------------------------- + // Ethernet Framer for X2E + //--------------------------------------- + wire [63:0] x2e_framed_tdata; + wire [3:0] x2e_framed_tuser; + wire x2e_framed_tlast, x2e_framed_tvalid, x2e_framed_tready; + + localparam [2:0] ST_IDLE = 3'd0; + localparam [2:0] ST_ETH_L0 = 3'd1; + localparam [2:0] ST_ETH_L1 = 3'd2; + localparam [2:0] ST_ETH_L2_IPV4_L0 = 3'd3; + localparam [2:0] ST_IPV4_L1 = 3'd4; + localparam [2:0] ST_IPV4_L2 = 3'd5; + localparam [2:0] ST_IPV4_UDP_HDR = 3'd6; + localparam [2:0] ST_CHDR_PAYLOAD = 3'd7; + + reg [2:0] frame_state = ST_IDLE; + reg [15:0] chdr_len = 16'd0; + reg [63:0] frame_tdata; + + always @(posedge clk) begin + if(rst) begin + frame_state <= ST_IDLE; + chdr_len <= 16'd0; + end else begin + case(frame_state) + ST_IDLE: begin + if (x2e_chdr_tvalid) begin + frame_state <= ST_ETH_L0; + chdr_len <= chdr_get_length(x2e_chdr_tdata); + end + end + ST_CHDR_PAYLOAD: begin + if (x2e_chdr_tvalid & x2e_framed_tready) + if (x2e_chdr_tlast) + frame_state <= ST_IDLE; + end + default: begin + if(x2e_framed_tready) + frame_state <= frame_state + 3'd1; + end + endcase + end + end + + assign x2e_chdr_tready = (frame_state == ST_CHDR_PAYLOAD) ? x2e_framed_tready : 1'b0; + assign x2e_framed_tvalid = (frame_state == ST_CHDR_PAYLOAD) ? x2e_chdr_tvalid : (frame_state == ST_IDLE) ? 1'b0 : 1'b1; + assign x2e_framed_tlast = (frame_state == ST_CHDR_PAYLOAD) ? x2e_chdr_tlast : 1'b0; + assign x2e_framed_tuser = ((frame_state == ST_CHDR_PAYLOAD) & x2e_chdr_tlast) ? {1'b0, chdr_len[2:0]} : 4'b0000; + assign x2e_framed_tdata = frame_tdata; + + wire [47:0] pad = 48'h0; + wire [47:0] mac_dst = x2e_chdr_tuser[47:0]; // Extract from router lookup results + wire [15:0] eth_type = 16'h0800; // IPv4 + wire [15:0] misc_ip = { 4'd4 /* IPv4 */, 4'd5 /* IP HDR Len */, 8'h00 /* DSCP and ECN */}; + wire [15:0] ip_len = (16'd28 + chdr_len); // 20 for IP, 8 for UDP + wire [15:0] ident = 16'h0; + wire [15:0] flag_frag = { 3'b010 /* don't fragment */, 13'h0 }; + wire [15:0] ttl_prot = { 8'h10 /* TTL */, 8'h11 /* UDP */ }; + wire [15:0] iphdr_checksum; + wire [31:0] ip_dst = x2e_chdr_tuser[79:48]; // Extract from router lookup results + wire [15:0] udp_dst = x2e_chdr_tuser[95:80]; // Extract from router lookup results + wire [15:0] udp_len = (16'd8 + chdr_len); + wire [15:0] udp_checksum = 16'h0; + + ip_hdr_checksum ip_hdr_checksum ( + .clk(clk), .in({misc_ip, ip_len, ident, flag_frag, ttl_prot, 16'd0, my_ipv4_addr, ip_dst}), + .out(iphdr_checksum) + ); + + always @(*) begin + case(frame_state) + ST_ETH_L0 : frame_tdata <= bswap64({pad[47:0], mac_dst[47:32]}); + ST_ETH_L1 : frame_tdata <= bswap64({mac_dst[31:0], my_eth_addr[47:16]}); + ST_ETH_L2_IPV4_L0 : frame_tdata <= bswap64({my_eth_addr[15:0], eth_type[15:0], misc_ip[15:0], ip_len[15:0]}); + ST_IPV4_L1 : frame_tdata <= bswap64({ident[15:0], flag_frag[15:0], ttl_prot[15:0], iphdr_checksum[15:0]}); + ST_IPV4_L2 : frame_tdata <= bswap64({my_ipv4_addr[31:0], ip_dst[31:0]}); + ST_IPV4_UDP_HDR : frame_tdata <= bswap64({my_udp_chdr_port[15:0], udp_dst[15:0], udp_len[15:0], udp_checksum[15:0]}); + default : frame_tdata <= x2e_chdr_tdata; + endcase + end + + wire [63:0] c2e_tdata; + wire [3:0] c2e_tuser; + wire c2e_tlast; + wire c2e_tvalid; + wire c2e_tready; + + generate + if (IS_CPU_ARM == 1'b1) begin + //--------------------------------------- + // Ethernet deframer for ARM + //--------------------------------------- + + // Add pad of 6 empty bytes to the ethernet packet going from the CPU to the + // SFP. This padding added before MAC addresses aligns the source and + // destination IP addresses, UDP headers etc. + // Note that the xge_mac_wrapper strips this padding to recreate the ethernet + // packet + arm_deframer inst_arm_deframer + ( + .clk(clk), + .reset(rst), + .clear(1'b0), + + .s_axis_tdata(s_cpu_tdata), + .s_axis_tuser(s_cpu_tuser), + .s_axis_tlast(s_cpu_tlast), + .s_axis_tvalid(s_cpu_tvalid), + .s_axis_tready(s_cpu_tready), + + .m_axis_tdata(c2e_tdata), + .m_axis_tuser(c2e_tuser), + .m_axis_tlast(c2e_tlast), + .m_axis_tvalid(c2e_tvalid), + .m_axis_tready(c2e_tready) + ); + end else begin + assign c2e_tdata = s_cpu_tdata; + assign c2e_tuser = s_cpu_tuser; + assign c2e_tlast = s_cpu_tlast; + assign c2e_tvalid = s_cpu_tvalid; + assign s_cpu_tready = c2e_tready; + end + endgenerate + + //--------------------------------------- + // X2E and C2E MUX + //--------------------------------------- + axi_mux #( + .SIZE(2), .PRIO(0), .WIDTH(64+4), .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) eth_mux_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({c2e_tuser, c2e_tdata, x2e_framed_tuser, x2e_framed_tdata}), .i_tlast({c2e_tlast, x2e_framed_tlast}), + .i_tvalid({c2e_tvalid, x2e_framed_tvalid}), .i_tready({c2e_tready, x2e_framed_tready}), + .o_tdata({m_mac_tuser, m_mac_tdata}), .o_tlast(m_mac_tlast), + .o_tvalid(m_mac_tvalid), .o_tready(m_mac_tready) + ); + +endmodule // eth_ipv4_chdr64_adapter +`default_nettype wire diff --git a/fpga/usrp3/lib/rfnoc/xport/eth_ipv4_chdr64_dispatch.v b/fpga/usrp3/lib/rfnoc/xport/eth_ipv4_chdr64_dispatch.v new file mode 100644 index 000000000..b954a8100 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/xport/eth_ipv4_chdr64_dispatch.v @@ -0,0 +1,472 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: eth_ipv4_chdr64_dispatch +// Description: +// This module serves as an Ethernet endpoint for CHDR traffic. +// Ethernet frames arrive on the s_mac port where they are +// inspected and classified as CHDR or !CHDR. A frame contains +// CHDR payload if it is addressed to us (Eth and IP), is a UDP +// packet and the destination port is one of the CHDR ports. +// The UDP payload for CHDR frame is sent out of the m_chdr port +// in addition to source information for Eth, IP and UDP. All +// other traffic address to us (Eth) is sent to the m_cpu port. +// Traffic not addressed (Eth) to us is dropped. +// +// Parameters: +// - DROP_UNKNOWN_MAC: Drop packets not addressed to us? +// +// Signals: +// - s_mac_*: The input Ethernet stream from the MAC (plus tuser for trailing bytes + err) +// The tuser bits are the values defined in xge_mac_wrapper. Most +// relevant is tuser[3], which signals a bad packet that must be +// dropped. +// - m_chdr_*: The output CHDR stream to the rfnoc infrastructure +// - m_cpu_*: The output Ethernet stream to the CPU (plus tuser for trailing bytes + err) +// - my_eth_addr: The Ethernet (MAC) address of this endpoint +// - my_ipv4_addr: The IPv4 address of this endpoint +// - my_udp_chdr_port: The UDP port allocated for CHDR traffic on this endpoint +// + +`default_nettype none +module eth_ipv4_chdr64_dispatch #( + parameter [0:0] DROP_UNKNOWN_MAC = 1 +)( + // Clocking and reset interface + input wire clk, + input wire rst, + // Input 68bit AXI-Stream interface (from MAC) + input wire [63:0] s_mac_tdata, + input wire [3:0] s_mac_tuser, + input wire s_mac_tlast, + input wire s_mac_tvalid, + output wire s_mac_tready, + // Output AXI-Stream interface to CHDR infrastructure + output wire [63:0] m_chdr_tdata, + output wire [95:0] m_chdr_tuser, + output wire m_chdr_tlast, + output wire m_chdr_tvalid, + input wire m_chdr_tready, + // Output AXI-Stream interface to CPU + output wire [63:0] m_cpu_tdata, + output wire [3:0] m_cpu_tuser, + output wire m_cpu_tlast, + output wire m_cpu_tvalid, + input wire m_cpu_tready, + // Device addresses + input wire [47:0] my_eth_addr, + input wire [31:0] my_ipv4_addr, + input wire [15:0] my_udp_chdr_port +); + + //--------------------------------------- + // Ethernet/IP/UDP Magic Numbers + //--------------------------------------- + + localparam [47:0] ETH_ADDR_BCAST = {48{1'b1}}; + localparam [15:0] ETH_TYPE_IPV4 = 16'h0800; + localparam [7:0] IPV4_PROTO_UDP = 8'h11; + + //--------------------------------------- + // Byte-swapping function + //--------------------------------------- + function [15:0] bswap16( + input [15:0] din + ); + begin + bswap16 = {din[0 +: 8], din[8 +: 8]}; + end + endfunction + + function [31:0] bswap32( + input [31:0] din + ); + begin + bswap32 = {din[0 +: 8], din[8 +: 8], + din[16+: 8], din[24+: 8]}; + end + endfunction + + //--------------------------------------- + // Input pipeline stage + //--------------------------------------- + + wire [63:0] in_tdata; + wire [3:0] in_tuser; + wire in_tlast, in_tvalid; + reg in_tready; + + wire [63:0] cpu_tdata; + wire [3:0] cpu_tuser; + reg cpu_tlast, cpu_terror, cpu_tvalid; + wire cpu_tready; + + wire [63:0] chdr_tdata; + wire [95:0] chdr_tuser; + wire chdr_tlast, chdr_tvalid, chdr_tready; + wire chdr_terror; + + axi_fifo #( + .WIDTH(64+4+1),.SIZE(1) + ) in_reg_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({s_mac_tlast, s_mac_tuser, s_mac_tdata}), + .i_tvalid(s_mac_tvalid), .i_tready(s_mac_tready), + .o_tdata({in_tlast, in_tuser, in_tdata}), + .o_tvalid(in_tvalid), .o_tready(in_tready), + .space(), .occupied() + ); + + //--------------------------------------- + // Classification state machine + //--------------------------------------- + + localparam [3:0] ST_IDLE_ETH_L0 = 4'd0; + localparam [3:0] ST_ETH_L1 = 4'd1; + localparam [3:0] ST_ETH_L2_IPV4_L0 = 4'd2; + localparam [3:0] ST_IPV4_L1 = 4'd3; + localparam [3:0] ST_IPV4_L2 = 4'd4; + localparam [3:0] ST_IPV4_UDP_HDR = 4'd5; + localparam [3:0] ST_FWD_CHDR = 4'd6; + localparam [3:0] ST_FWD_CPU = 4'd7; + localparam [3:0] ST_DROP_PKT = 4'd8; + + // State info + reg [3:0] state = ST_IDLE_ETH_L0; + reg discard_cpu_pkt = 1'b0; + + // Cached fields + reg [47:0] eth_dst_addr_cached, eth_src_addr_cached; + reg [31:0] ipv4_src_addr_cached; + reg [15:0] udp_src_port_cached; + + always @(posedge clk) begin + if (rst) begin + state <= ST_IDLE_ETH_L0; + discard_cpu_pkt <= 1'b0; + end else if (in_tvalid && in_tready) begin + case (state) + // Idle or First line of Eth frame + // ---------------------------------- + // | DstMAC_HI (16) | Preamble (48) | + // ---------------------------------- + ST_IDLE_ETH_L0: begin + discard_cpu_pkt <= 1'b0; + if (!in_tlast) begin + // Just cache addresses. No decisions to be made. + eth_dst_addr_cached[47:32] <= bswap16(in_tdata[48 +: 16]); + if (in_tuser[3]) begin + state <= ST_DROP_PKT; + discard_cpu_pkt <= 1'b1; + end else begin + state <= ST_ETH_L1; + end + end else begin + // Short packet: Violates min eth size of 64 bytes + state <= ST_IDLE_ETH_L0; + end + end + + // Second line of Eth frame + // ----------------------------------- + // | SrcMAC_HI (32) | DstMAC_LO (32) | + // ----------------------------------- + ST_ETH_L1: begin + if (!in_tlast) begin + // Just cache addresses. No decisions to be made. + eth_dst_addr_cached[31:0] <= bswap32(in_tdata[0 +: 32]); + eth_src_addr_cached[47:16] <= bswap32(in_tdata[32 +: 32]); + if (in_tuser[3]) begin + state <= ST_DROP_PKT; + discard_cpu_pkt <= 1'b1; + end else begin + state <= ST_ETH_L2_IPV4_L0; + end + end else begin + // Short packet: Violates min eth size of 64 bytes + state <= ST_IDLE_ETH_L0; + end + end + + // End Eth frame and start of IP + // -------------------------------------------------- + // | IPv4_Line0 (32)| EthType (16) | SrcMAC_LO (16) | + // -------------------------------------------------- + ST_ETH_L2_IPV4_L0: begin + if (!in_tlast) begin + eth_src_addr_cached[15:0] <= bswap16(in_tdata[0 +: 16]); + if (in_tuser[3]) begin + state <= ST_DROP_PKT; + discard_cpu_pkt <= 1'b1; + end else if (eth_dst_addr_cached == ETH_ADDR_BCAST) begin + // If Eth destination is bcast then fwd to CPU + state <= ST_FWD_CPU; + end else if (eth_dst_addr_cached != my_eth_addr && DROP_UNKNOWN_MAC) begin + // If Eth destination is not us then drop the packet + state <= ST_DROP_PKT; + discard_cpu_pkt <= 1'b1; + end else if (bswap16(in_tdata[16 +: 16]) != ETH_TYPE_IPV4) begin + // If this is not an IPv4 frame then fwd to CPU + state <= ST_FWD_CPU; + end else begin + // Otherwise continue classification + // We know this is an IPv4 frame + state <= ST_IPV4_L1; + end + end else begin + // Short packet: Violates min eth size of 64 bytes + state <= ST_IDLE_ETH_L0; + end + end + + // Continue IPv4 header + // ------------------------------------- + // | IPv4_Line2 (32) | IPv4_Line1 (32) | + // ------------------------------------- + ST_IPV4_L1: begin + if (!in_tlast) begin + if (in_tuser[3]) begin + state <= ST_DROP_PKT; + discard_cpu_pkt <= 1'b1; + end else if (in_tdata[40 +: 8] != IPV4_PROTO_UDP) begin + // If this is not a UDP frame then fwd to CPU + state <= ST_FWD_CPU; + end else begin + // Otherwise continue classification + // We know this is a UDP frame + state <= ST_IPV4_L2; + end + end else begin + // Short packet: Violates min eth size of 64 bytes + state <= ST_IDLE_ETH_L0; + end + end + + // Continue IPv4 header + // ----------------------------------- + // | IPDstAddr (32) | IPSrcAddr (32) | + // ----------------------------------- + ST_IPV4_L2: begin + if (!in_tlast) begin + ipv4_src_addr_cached <= bswap32(in_tdata[0 +: 32]); + if (in_tuser[3]) begin + state <= ST_DROP_PKT; + discard_cpu_pkt <= 1'b1; + end else if (bswap32(in_tdata[32 +: 32]) != my_ipv4_addr) begin + // If IPv4 destination is not us then fwd to CPU + state <= ST_FWD_CPU; + end else begin + // Otherwise continue classification + // We know this is a UDP frame for us + state <= ST_IPV4_UDP_HDR; + end + end else begin + // Short packet: Violates min eth size of 64 bytes + state <= ST_IDLE_ETH_L0; + end + end + + // UDP header + // ----------------------------------------------------------- + // | Chksum (16) | Length (16) | DstPort (16) | SrcPort (16) | + // ----------------------------------------------------------- + ST_IPV4_UDP_HDR: begin + if (!in_tlast) begin + udp_src_port_cached <= bswap16(in_tdata[0 +: 16]); + if (in_tuser[3]) begin + state <= ST_DROP_PKT; + discard_cpu_pkt <= 1'b1; + end else if (bswap16(in_tdata[16 +: 16]) == my_udp_chdr_port) begin + // The UDP port matches CHDR port + state <= ST_FWD_CHDR; + discard_cpu_pkt <= 1'b1; + end else begin + // Not the CHDR port. Forward to CPU + state <= ST_FWD_CPU; + end + end else begin + // Short packet: Violates min eth size of 64 bytes + state <= ST_IDLE_ETH_L0; + end + end + + // CHDR Payload + ST_FWD_CHDR: begin + discard_cpu_pkt <= 1'b0; + if (in_tlast) + state <= ST_IDLE_ETH_L0; + end + + // NotCHDR Payload: Send to CPU + ST_FWD_CPU: begin + if (in_tlast) + state <= ST_IDLE_ETH_L0; + end + + // Unwanted Payload: Drop + ST_DROP_PKT: begin + discard_cpu_pkt <= 1'b0; + if (in_tlast) + state <= ST_IDLE_ETH_L0; + end + + // We should never get here + default: begin + state <= ST_IDLE_ETH_L0; + end + endcase + end + end + + always @(*) begin + case (state) + ST_IDLE_ETH_L0: begin + in_tready = cpu_tready; + cpu_tvalid = in_tvalid; + cpu_tlast = in_tlast; + cpu_terror = in_tlast; // Illegal short packet: Drop it + end + ST_ETH_L1: begin + in_tready = cpu_tready; + cpu_tvalid = in_tvalid; + cpu_tlast = in_tlast; + cpu_terror = in_tlast; // Illegal short packet: Drop it + end + ST_ETH_L2_IPV4_L0: begin + in_tready = cpu_tready; + cpu_tvalid = in_tvalid; + cpu_tlast = in_tlast; + cpu_terror = in_tlast; // Illegal short packet: Drop it + end + ST_IPV4_L1: begin + in_tready = cpu_tready; + cpu_tvalid = in_tvalid; + cpu_tlast = in_tlast; + cpu_terror = in_tlast; // Illegal short packet: Drop it + end + ST_IPV4_L2: begin + in_tready = cpu_tready; + cpu_tvalid = in_tvalid; + cpu_tlast = in_tlast; + cpu_terror = in_tlast; // Illegal short packet: Drop it + end + ST_IPV4_UDP_HDR: begin + in_tready = cpu_tready; + cpu_tvalid = in_tvalid; + cpu_tlast = in_tlast; + cpu_terror = in_tlast; // Illegal short packet: Drop it + end + ST_FWD_CHDR: begin + in_tready = chdr_tready & (discard_cpu_pkt ? cpu_tready : 1'b1); + cpu_tvalid = discard_cpu_pkt; + cpu_tlast = discard_cpu_pkt; + cpu_terror = discard_cpu_pkt; + end + ST_FWD_CPU: begin + in_tready = cpu_tready; + cpu_tvalid = in_tvalid; + cpu_tlast = in_tlast; + cpu_terror = 1'b0; + end + ST_DROP_PKT: begin + in_tready = discard_cpu_pkt ? cpu_tready : 1'b1; + cpu_tvalid = discard_cpu_pkt; + cpu_tlast = discard_cpu_pkt; + cpu_terror = discard_cpu_pkt; + end + default: begin + in_tready = 1'b0; + cpu_tvalid = 1'b0; + cpu_tlast = 1'b0; + cpu_terror = 1'b0; + end + endcase + end + + assign cpu_tdata = in_tdata; + assign cpu_tuser = in_tuser; + + assign chdr_tdata = in_tdata; + assign chdr_tuser = {udp_src_port_cached, ipv4_src_addr_cached, eth_src_addr_cached}; + assign chdr_tlast = in_tlast; + assign chdr_tvalid = in_tvalid && (state == ST_FWD_CHDR); + assign chdr_terror = in_tuser[3]; + + //--------------------------------------- + // Output processing + //--------------------------------------- + + wire [63:0] o_cpu_tdata; + wire [3:0] o_cpu_tuser; + wire o_cpu_terror, o_cpu_tlast, o_cpu_tvalid, o_cpu_tready; + + axi_fifo #( + .WIDTH(64+4+1+1),.SIZE(1) + ) out_reg_cpu_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({cpu_tlast, cpu_terror, cpu_tuser, cpu_tdata}), + .i_tvalid(cpu_tvalid), .i_tready(cpu_tready), + .o_tdata({o_cpu_tlast, o_cpu_terror, o_cpu_tuser, o_cpu_tdata}), + .o_tvalid(o_cpu_tvalid), .o_tready(o_cpu_tready), + .space(), .occupied() + ); + + // We cannot make a CHDR/noCHDR routing decision until we are in the middle + // of a packet so we use a packet gate for the CPU path because we can rewind + // the write pointer and drop the packet in case it's destined for the CHDR + // path. + // NOTE: The SIZE of this FIFO must be 11 to accomodate a 9000 byte jumbo frame + // regardless of the CHDR MTU + axi_packet_gate #( .WIDTH(64+4), .SIZE(11), .USE_AS_BUFF(0) ) cpu_out_gate_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata({o_cpu_tuser, o_cpu_tdata}), .i_tlast(o_cpu_tlast), .i_terror(o_cpu_terror | o_cpu_tuser[3]), + .i_tvalid(o_cpu_tvalid), .i_tready(o_cpu_tready), + .o_tdata({m_cpu_tuser, m_cpu_tdata}), .o_tlast(m_cpu_tlast), + .o_tvalid(m_cpu_tvalid), .o_tready(m_cpu_tready) + ); + + wire [63:0] o_chdr_tdata; + wire [95:0] o_chdr_tuser; + wire o_chdr_tlast, o_chdr_tvalid, o_chdr_tready; + wire o_chdr_data_tvalid, o_chdr_user_tvalid; + wire o_chdr_data_tready, o_chdr_user_tready; + + axi_fifo #( + .WIDTH(96),.SIZE(8) + ) chdr_user_fifo_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata(chdr_tuser), + .i_tvalid(chdr_tvalid & chdr_tready & chdr_tlast & ~chdr_terror), .i_tready(/* Always ready */), + .o_tdata(o_chdr_tuser), + .o_tvalid(o_chdr_user_tvalid), .o_tready(o_chdr_user_tready), + .space(), .occupied() + ); + + axi_packet_gate #( + .WIDTH(64), .SIZE(11), .USE_AS_BUFF(1), .MIN_PKT_SIZE(1) + ) chdr_out_gate_i ( + .clk(clk), .reset(rst), .clear(1'b0), + .i_tdata(chdr_tdata), .i_tlast(chdr_tlast), .i_terror(chdr_terror), + .i_tvalid(chdr_tvalid), .i_tready(chdr_tready), + .o_tdata(o_chdr_tdata), .o_tlast(o_chdr_tlast), + .o_tvalid(o_chdr_data_tvalid), .o_tready(o_chdr_data_tready) + ); + + assign o_chdr_tvalid = o_chdr_data_tvalid & o_chdr_user_tvalid; + assign o_chdr_user_tready = o_chdr_tready & o_chdr_data_tvalid & o_chdr_tlast; + assign o_chdr_data_tready = o_chdr_tready & o_chdr_user_tvalid; + + chdr_trim_payload #( + .CHDR_W(64), .USER_W(96) + ) chdr_trim_i ( + .clk(clk), .rst(rst), + .s_axis_tdata(o_chdr_tdata), .s_axis_tuser(o_chdr_tuser), + .s_axis_tlast(o_chdr_tlast), .s_axis_tvalid(o_chdr_tvalid), .s_axis_tready(o_chdr_tready), + .m_axis_tdata(m_chdr_tdata), .m_axis_tuser(m_chdr_tuser), + .m_axis_tlast(m_chdr_tlast), .m_axis_tvalid(m_chdr_tvalid), .m_axis_tready(m_chdr_tready) + ); + +endmodule // eth_ipv4_chdr64_dispatch +`default_nettype wire diff --git a/fpga/usrp3/lib/rfnoc/xport/liberio_chdr64_adapter.v b/fpga/usrp3/lib/rfnoc/xport/liberio_chdr64_adapter.v new file mode 100644 index 000000000..2800c46bb --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/xport/liberio_chdr64_adapter.v @@ -0,0 +1,120 @@ +// +// Copyright 2019 Ettus Research, A National Instruments brand +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: liberio_chdr64_adapter +// Description: The transport adapter for a liberio transport with CHDR_W of +// 64. A tuser field is used to identify the DMA engine for return routes. +// The stream for a given SrcEPID with ID s_dma_tuser will return packets to +// that EPID with same ID on m_dma_tuser (after an Advertise management +// operation). +// +// Parameters: +// - PROTOVER: RFNoC protocol version {8'd<major>, 8'd<minor>} +// - RT_TBL_SIZE: Log2 of the depth of the return-address routing table +// - NODE_INST: The node type to return for a node-info discovery +// - DMA_ID_WIDTH: The width of the tuser signal that identifies the DMA engine) +// +// Signals: +// - device_id : The ID of the device that has instantiated this module +// - s_dma_*: The input DMA stream from the CPU (plus tuser for source DMA engine ID) +// - m_dma_*: The output DMA stream to the CPU (plus tuser for dest DMA engine ID) +// - s_chdr_*: The input CHDR stream from the rfnoc infrastructure +// - m_chdr_*: The output CHDR stream to the rfnoc infrastructure +// + +module liberio_chdr64_adapter #( + parameter [15:0] PROTOVER = {8'd1, 8'd0}, + parameter RT_TBL_SIZE = 6, + parameter NODE_INST = 0, + parameter DMA_ID_WIDTH = 8 +)( + // Clocking and reset interface + input wire clk, + input wire rst, + // Device info + input wire [15:0] device_id, + // AXI-Stream interface to/from DMA engines + input wire [63:0] s_dma_tdata, + input wire [DMA_ID_WIDTH-1:0] s_dma_tuser, + input wire s_dma_tlast, + input wire s_dma_tvalid, + output wire s_dma_tready, + output wire [63:0] m_dma_tdata, + output wire [DMA_ID_WIDTH-1:0] m_dma_tuser, + output wire m_dma_tlast, + output wire m_dma_tvalid, + input wire m_dma_tready, + // AXI-Stream interface to/from CHDR infrastructure + input wire [63:0] s_chdr_tdata, + input wire s_chdr_tlast, + input wire s_chdr_tvalid, + output wire s_chdr_tready, + output wire [63:0] m_chdr_tdata, + output wire m_chdr_tlast, + output wire m_chdr_tvalid, + input wire m_chdr_tready +); + + `include "../core/rfnoc_chdr_utils.vh" + `include "../core/rfnoc_chdr_internal_utils.vh" + `include "rfnoc_xport_types.vh" + + //--------------------------------------- + // CHDR Transport Adapter + //--------------------------------------- + wire [DMA_ID_WIDTH-1:0] m_axis_xport_tuser; + + chdr_xport_adapter_generic #( + .PROTOVER(PROTOVER), .CHDR_W(64), + .USER_W(DMA_ID_WIDTH), .TBL_SIZE(RT_TBL_SIZE), + .NODE_SUBTYPE(NODE_SUBTYPE_XPORT_LIBERIO_CHDR64), .NODE_INST(NODE_INST) + ) xport_adapter_gen_i ( + .clk (clk), + .rst (rst), + .device_id (device_id), + .s_axis_xport_tdata (s_dma_tdata), + .s_axis_xport_tuser (s_dma_tuser), + .s_axis_xport_tlast (s_dma_tlast), + .s_axis_xport_tvalid(s_dma_tvalid), + .s_axis_xport_tready(s_dma_tready), + .m_axis_xport_tdata (m_dma_tdata), + .m_axis_xport_tuser (m_axis_xport_tuser), + .m_axis_xport_tlast (m_dma_tlast), + .m_axis_xport_tvalid(m_dma_tvalid), + .m_axis_xport_tready(m_dma_tready), + .s_axis_rfnoc_tdata (s_chdr_tdata), + .s_axis_rfnoc_tlast (s_chdr_tlast), + .s_axis_rfnoc_tvalid(s_chdr_tvalid), + .s_axis_rfnoc_tready(s_chdr_tready), + .m_axis_rfnoc_tdata (m_chdr_tdata), + .m_axis_rfnoc_tlast (m_chdr_tlast), + .m_axis_rfnoc_tvalid(m_chdr_tvalid), + .m_axis_rfnoc_tready(m_chdr_tready), + .ctrlport_req_wr (/* unused */), + .ctrlport_req_rd (/* unused */), + .ctrlport_req_addr (/* unused */), + .ctrlport_req_data (/* unused */), + .ctrlport_resp_ack (/* unused */ 1'b0), + .ctrlport_resp_data (/* unused */ 32'd0) + ); + + // Ensure tdest does not change for entire packet + reg m_hdr = 1'b1; + always @(posedge clk) begin + if (rst) + m_hdr <= 1'b1; + else if (m_dma_tvalid && m_dma_tready) + m_hdr <= m_dma_tlast; + end + + reg [DMA_ID_WIDTH-1:0] cached_dest = {DMA_ID_WIDTH{1'b0}}; + always @(posedge clk) begin + if (m_hdr) + cached_dest <= m_axis_xport_tuser; + end + + assign m_dma_tuser = m_hdr ? m_axis_xport_tuser : cached_dest; + +endmodule // liberio_chdr64_adapter diff --git a/fpga/usrp3/lib/rfnoc/xport/rfnoc_xport_types.vh b/fpga/usrp3/lib/rfnoc/xport/rfnoc_xport_types.vh new file mode 100644 index 000000000..5b273a4b4 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/xport/rfnoc_xport_types.vh @@ -0,0 +1,11 @@ +// +// Copyright 2019 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +// Add all new transport types here +localparam [7:0] NODE_SUBTYPE_XPORT_GENERIC = 8'd0; +localparam [7:0] NODE_SUBTYPE_XPORT_IPV4_CHDR64 = 8'd1; +localparam [7:0] NODE_SUBTYPE_XPORT_LIBERIO_CHDR64 = 8'd2; +localparam [7:0] NODE_SUBTYPE_XPORT_NIRIO_CHDR64 = 8'd3; |