From bafa9d95453387814ef25e6b6256ba8db2df612f Mon Sep 17 00:00:00 2001 From: Martin Braun Date: Thu, 23 Jan 2020 16:10:22 -0800 Subject: Merge FPGA repository back into UHD repository MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The FPGA codebase was removed from the UHD repository in 2014 to reduce the size of the repository. However, over the last half-decade, the split between the repositories has proven more burdensome than it has been helpful. By merging the FPGA code back, it will be possible to create atomic commits that touch both FPGA and UHD codebases. Continuous integration testing is also simplified by merging the repositories, because it was previously difficult to automatically derive the correct UHD branch when testing a feature branch on the FPGA repository. This commit also updates the license files and paths therein. We are therefore merging the repositories again. Future development for FPGA code will happen in the same repository as the UHD host code and MPM code. == Original Codebase and Rebasing == The original FPGA repository will be hosted for the foreseeable future at its original local location: https://github.com/EttusResearch/fpga/ It can be used for bisecting, reference, and a more detailed history. The final commit from said repository to be merged here is 05003794e2da61cabf64dd278c45685a7abad7ec. This commit is tagged as v4.0.0.0-pre-uhd-merge. If you have changes in the FPGA repository that you want to rebase onto the UHD repository, simply run the following commands: - Create a directory to store patches (this should be an empty directory): mkdir ~/patches - Now make sure that your FPGA codebase is based on the same state as the code that was merged: cd src/fpga # Or wherever your FPGA code is stored git rebase v4.0.0.0-pre-uhd-merge Note: The rebase command may look slightly different depending on what exactly you're trying to rebase. - Create a patch set for your changes versus v4.0.0.0-pre-uhd-merge: git format-patch v4.0.0.0-pre-uhd-merge -o ~/patches Note: Make sure that only patches are stored in your output directory. It should otherwise be empty. Make sure that you picked the correct range of commits, and only commits you wanted to rebase were exported as patch files. - Go to the UHD repository and apply the patches: cd src/uhd # Or wherever your UHD repository is stored git am --directory fpga ~/patches/* rm -rf ~/patches # This is for cleanup == Contributors == The following people have contributed mainly to these files (this list is not complete): Co-authored-by: Alex Williams Co-authored-by: Andrej Rode Co-authored-by: Ashish Chaudhari Co-authored-by: Ben Hilburn Co-authored-by: Ciro Nishiguchi Co-authored-by: Daniel Jepson Co-authored-by: Derek Kozel Co-authored-by: EJ Kreinar Co-authored-by: Humberto Jimenez Co-authored-by: Ian Buckley Co-authored-by: Jörg Hofrichter Co-authored-by: Jon Kiser Co-authored-by: Josh Blum Co-authored-by: Jonathon Pendlum Co-authored-by: Martin Braun Co-authored-by: Matt Ettus Co-authored-by: Michael West Co-authored-by: Moritz Fischer Co-authored-by: Nick Foster Co-authored-by: Nicolas Cuervo Co-authored-by: Paul Butler Co-authored-by: Paul David Co-authored-by: Ryan Marlow Co-authored-by: Sugandha Gupta Co-authored-by: Sylvain Munaut Co-authored-by: Trung Tran Co-authored-by: Vidush Vishwanath Co-authored-by: Wade Fife --- fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs | 25 ++ fpga/usrp3/lib/rfnoc/crossbar/README.pdf | Bin 0 -> 1714398 bytes .../rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v | 288 ++++++++++++ .../lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v | 130 ++++++ .../lib/rfnoc/crossbar/axis_ingress_vc_buff.v | 178 ++++++++ .../lib/rfnoc/crossbar/axis_port_terminator.v | 44 ++ fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v | 164 +++++++ fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v | 381 ++++++++++++++++ .../lib/rfnoc/crossbar/chdr_xb_ingress_buff.v | 259 +++++++++++ .../lib/rfnoc/crossbar/chdr_xb_routing_table.v | 122 ++++++ fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile | 52 +++ .../crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile | 51 +++ .../axis_ctrl_crossbar_nxn_tb.sv | 26 ++ .../crossbar_tb/chdr_crossbar_nxn_tb/Makefile | 51 +++ .../chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv | 26 ++ .../crossbar/crossbar_tb/chdr_traffic_sink_sim.sv | 150 +++++++ .../crossbar_tb/chdr_traffic_source_sim.sv | 202 +++++++++ .../lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv | 428 ++++++++++++++++++ .../crossbar/crossbar_tb/gen_load_latency_graph.py | 169 ++++++++ .../rfnoc/crossbar/crossbar_tb/run_sim_multi.py | 106 +++++ .../rfnoc/crossbar/gen_node_to_coord_mapping.py | 125 ++++++ .../rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v | 481 +++++++++++++++++++++ .../rfnoc/crossbar/mesh_2d_dor_router_single_sw.v | 398 +++++++++++++++++ fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh | 294 +++++++++++++ .../crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl | 18 + .../crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in | 47 ++ .../rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl | 18 + .../crossbar/synth/chdr_crossbar_nxn_top.v.in | 55 +++ .../crossbar/synth/synth_axis_ctrl_crossbar_nxn.py | 37 ++ .../crossbar/synth/synth_chdr_crossbar_nxn.py | 37 ++ fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py | 67 +++ .../rfnoc/crossbar/torus_2d_dor_router_multi_sw.v | 338 +++++++++++++++ .../rfnoc/crossbar/torus_2d_dor_router_single_sw.v | 294 +++++++++++++ 33 files changed, 5061 insertions(+) create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/README.pdf create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv create mode 100755 fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py create mode 100755 fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py create mode 100755 fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in create mode 100755 fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py create mode 100755 fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v create mode 100644 fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v (limited to 'fpga/usrp3/lib/rfnoc/crossbar') diff --git a/fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs b/fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs new file mode 100644 index 000000000..6fa49cd04 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs @@ -0,0 +1,25 @@ +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +################################################## +# Crossbar Sources +################################################## +RFNOC_XBAR_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/crossbar/, \ +axis_ctrl_crossbar_2d_mesh.v \ +axis_ctrl_crossbar_nxn.v \ +torus_2d_dor_router_single_sw.v \ +mesh_2d_dor_router_single_sw.v \ +axis_ingress_vc_buff.v \ +axis_switch.v \ +axis_port_terminator.v \ +chdr_crossbar_nxn.v \ +chdr_xb_ingress_buff.v \ +chdr_xb_routing_table.v \ +)) + +# Unused sources +# torus_2d_dor_router_multi_sw.v \ +# mesh_2d_dor_router_multi_sw.v \ diff --git a/fpga/usrp3/lib/rfnoc/crossbar/README.pdf b/fpga/usrp3/lib/rfnoc/crossbar/README.pdf new file mode 100644 index 000000000..838702bd1 Binary files /dev/null and b/fpga/usrp3/lib/rfnoc/crossbar/README.pdf differ diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v new file mode 100644 index 000000000..e69bdfe3c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v @@ -0,0 +1,288 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ctrl_crossbar_2d_mesh +// Description: +// This module implements a 2-dimentional (2d) mesh network (mesh) crossbar +// for AXIS-CTRL traffic. Supports mesh and torus topologies. +// It uses AXI-Stream for all of its links. +// The torus topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - WIDTH: Width of the AXI-Stream data bus +// - DIM_SIZE: Number of routers alone one dimension (# Nodes = DIM_SIZE * DIM_SIZE) +// - TOPOLOGY: Is this a mesh (MESH) or a torus (TORUS) topology +// - INGRESS_BUFF_SIZE: log2 of the ingress terminal buffer size (in words) +// - ROUTER_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words) +// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers. +// * WORMHOLE: Allocate route as soon as first word in pkt arrives +// * CUT-THROUGH: Allocate route only after the full pkt arrives +// - SWITCH_ALLOC: Algorithm to allocate the switch +// * PRIO: Priority based. Priority: Y-dim > X-dim > Term +// * ROUND-ROBIN: Round robin input port allocation +// - DEADLOCK_TIMEOUT: Number of cycles to wait until a deadlock is detected +// Signals: +// - s_axis_*: Slave port for router (flattened) +// - m_axis_*: Master port for router (flattened) +// + +module axis_ctrl_crossbar_2d_mesh #( + parameter DIM_SIZE = 4, + parameter WIDTH = 64, + parameter TOPOLOGY = "MESH", + parameter INGRESS_BUFF_SIZE = 5, + parameter ROUTER_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", + parameter SWITCH_ALLOC = "PRIO", + parameter DEADLOCK_TIMEOUT = 16384 +) ( + input wire clk, + input wire reset, + // Inputs + input wire [(DIM_SIZE*DIM_SIZE*WIDTH)-1:0] s_axis_tdata, + input wire [DIM_SIZE*DIM_SIZE-1:0] s_axis_tlast, + input wire [DIM_SIZE*DIM_SIZE-1:0] s_axis_tvalid, + output wire [DIM_SIZE*DIM_SIZE-1:0] s_axis_tready, + // Output + output wire [(DIM_SIZE*DIM_SIZE*WIDTH)-1:0] m_axis_tdata, + output wire [DIM_SIZE*DIM_SIZE-1:0] m_axis_tlast, + output wire [DIM_SIZE*DIM_SIZE-1:0] m_axis_tvalid, + input wire [DIM_SIZE*DIM_SIZE-1:0] m_axis_tready, + // Deadlock alert + output wire deadlock_detected +); + + `include "mesh_node_mapping.vh" + + //------------------------------------------------------- + // Unflatten input and output ports + //------------------------------------------------------- + + wire [WIDTH-1:0] i_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire i_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire i_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire i_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire [WIDTH-1:0] o_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire o_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire o_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire o_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire clear_routers = deadlock_detected; + + genvar p,x,y; + generate + for (p = 0; p < DIM_SIZE*DIM_SIZE; p=p+1) begin + assign i_tdata_arr[node_to_ydst(p)][node_to_xdst(p)] = s_axis_tdata[p*WIDTH +: WIDTH]; + assign i_tlast_arr[node_to_ydst(p)][node_to_xdst(p)] = s_axis_tlast[p]; + assign i_tvalid_arr[node_to_ydst(p)][node_to_xdst(p)] = s_axis_tvalid[p]; + assign s_axis_tready[p] = i_tready_arr[node_to_ydst(p)][node_to_xdst(p)] | clear_routers; + + assign m_axis_tdata[p*WIDTH +: WIDTH] = o_tdata_arr[node_to_ydst(p)][node_to_xdst(p)]; + assign m_axis_tlast[p] = o_tlast_arr [node_to_ydst(p)][node_to_xdst(p)]; + assign m_axis_tvalid[p] = o_tvalid_arr[node_to_ydst(p)][node_to_xdst(p)] & ~clear_routers; + assign o_tready_arr[node_to_ydst(p)][node_to_xdst(p)] = m_axis_tready[p]; + end + endgenerate + + //------------------------------------------------------- + // Instantiate routers + //------------------------------------------------------- + + wire [WIDTH-1:0] e2w_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire e2w_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire e2w_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire e2w_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire e2w_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire [WIDTH-1:0] w2e_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire w2e_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire w2e_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire w2e_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire w2e_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire [WIDTH-1:0] n2s_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire n2s_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire n2s_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire n2s_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire n2s_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire [WIDTH-1:0] s2n_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire s2n_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire s2n_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire s2n_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire s2n_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + localparam N = DIM_SIZE; + localparam NEND = DIM_SIZE - 1; + localparam [WIDTH-1:0] ZERO = {WIDTH{1'b0}}; + + generate + for (y = 0; y < DIM_SIZE; y=y+1) begin: ydim + for (x = 0; x < DIM_SIZE; x=x+1) begin: xdim + if (TOPOLOGY == "MESH") begin + mesh_2d_dor_router_single_sw #( + .WIDTH (WIDTH), + .DIM_SIZE (DIM_SIZE), + .XB_ADDR_X (x), + .XB_ADDR_Y (y), + .TERM_BUFF_SIZE (INGRESS_BUFF_SIZE), + .XB_BUFF_SIZE (ROUTER_BUFF_SIZE), + .ROUTING_ALLOC (ROUTING_ALLOC), + .SWITCH_ALLOC (SWITCH_ALLOC) + ) rtr_i ( + // Clock and reset + .clk (clk), + .reset (reset | clear_routers), + // Terminals + .s_axis_ter_tdata (i_tdata_arr [y][x]), + .s_axis_ter_tlast (i_tlast_arr [y][x]), + .s_axis_ter_tvalid (i_tvalid_arr[y][x]), + .s_axis_ter_tready (i_tready_arr[y][x]), + .m_axis_ter_tdata (o_tdata_arr [y][x]), + .m_axis_ter_tlast (o_tlast_arr [y][x]), + .m_axis_ter_tvalid (o_tvalid_arr[y][x]), + .m_axis_ter_tready (o_tready_arr[y][x]), + // West connections + .s_axis_wst_tdata ((x != 0) ? e2w_tdata_arr [y][x] : ZERO), + .s_axis_wst_tdest ((x != 0) ? e2w_tdest_arr [y][x] : 1'b0), + .s_axis_wst_tlast ((x != 0) ? e2w_tlast_arr [y][x] : 1'b0), + .s_axis_wst_tvalid ((x != 0) ? e2w_tvalid_arr[y][x] : 1'b0), + .s_axis_wst_tready ( e2w_tready_arr[y][x] ), + .m_axis_wst_tdata ( w2e_tdata_arr [y][(x+N-1)%N] ), + .m_axis_wst_tdest ( w2e_tdest_arr [y][(x+N-1)%N] ), + .m_axis_wst_tlast ( w2e_tlast_arr [y][(x+N-1)%N] ), + .m_axis_wst_tvalid ( w2e_tvalid_arr[y][(x+N-1)%N] ), + .m_axis_wst_tready ((x != 0) ? w2e_tready_arr[y][(x+N-1)%N] : 1'b1), + // East connections + .s_axis_est_tdata ((x != NEND) ? w2e_tdata_arr [y][x] : ZERO), + .s_axis_est_tdest ((x != NEND) ? w2e_tdest_arr [y][x] : 1'b0), + .s_axis_est_tlast ((x != NEND) ? w2e_tlast_arr [y][x] : 1'b0), + .s_axis_est_tvalid ((x != NEND) ? w2e_tvalid_arr[y][x] : 1'b0), + .s_axis_est_tready ( w2e_tready_arr[y][x] ), + .m_axis_est_tdata ( e2w_tdata_arr [y][(x+1)%N] ), + .m_axis_est_tdest ( e2w_tdest_arr [y][(x+1)%N] ), + .m_axis_est_tlast ( e2w_tlast_arr [y][(x+1)%N] ), + .m_axis_est_tvalid ( e2w_tvalid_arr[y][(x+1)%N] ), + .m_axis_est_tready ((x != NEND) ? e2w_tready_arr[y][(x+1)%N] : 1'b1), + // North connections + .s_axis_nor_tdata ((y != 0) ? s2n_tdata_arr [y][x] : ZERO), + .s_axis_nor_tdest ((y != 0) ? s2n_tdest_arr [y][x] : 1'b0), + .s_axis_nor_tlast ((y != 0) ? s2n_tlast_arr [y][x] : 1'b0), + .s_axis_nor_tvalid ((y != 0) ? s2n_tvalid_arr[y][x] : 1'b0), + .s_axis_nor_tready ( s2n_tready_arr[y][x] ), + .m_axis_nor_tdata ( n2s_tdata_arr [(y+N-1)%N][x] ), + .m_axis_nor_tdest ( n2s_tdest_arr [(y+N-1)%N][x] ), + .m_axis_nor_tlast ( n2s_tlast_arr [(y+N-1)%N][x] ), + .m_axis_nor_tvalid ( n2s_tvalid_arr[(y+N-1)%N][x] ), + .m_axis_nor_tready ((y != 0) ? n2s_tready_arr[(y+N-1)%N][x] : 1'b1), + // South connections + .s_axis_sou_tdata ((y != NEND) ? n2s_tdata_arr [y][x] : ZERO), + .s_axis_sou_tdest ((y != NEND) ? n2s_tdest_arr [y][x] : 1'b0), + .s_axis_sou_tlast ((y != NEND) ? n2s_tlast_arr [y][x] : 1'b0), + .s_axis_sou_tvalid ((y != NEND) ? n2s_tvalid_arr[y][x] : 1'b0), + .s_axis_sou_tready ( n2s_tready_arr[y][x] ), + .m_axis_sou_tdata ( s2n_tdata_arr [(y+1)%N][x] ), + .m_axis_sou_tdest ( s2n_tdest_arr [(y+1)%N][x] ), + .m_axis_sou_tlast ( s2n_tlast_arr [(y+1)%N][x] ), + .m_axis_sou_tvalid ( s2n_tvalid_arr[(y+1)%N][x] ), + .m_axis_sou_tready ((y != NEND) ? s2n_tready_arr[(y+1)%N][x] : 1'b1) + ); + end else begin + torus_2d_dor_router_single_sw #( + .WIDTH (WIDTH), + .DIM_SIZE (DIM_SIZE), + .XB_ADDR_X (x), + .XB_ADDR_Y (y), + .TERM_BUFF_SIZE (INGRESS_BUFF_SIZE), + .XB_BUFF_SIZE (ROUTER_BUFF_SIZE), + .ROUTING_ALLOC (ROUTING_ALLOC), + .SWITCH_ALLOC (SWITCH_ALLOC) + ) rtr_i ( + // Clock and reset + .clk (clk), + .reset (reset | clear_routers), + // Terminals + .s_axis_term_tdata (i_tdata_arr [y][x]), + .s_axis_term_tlast (i_tlast_arr [y][x]), + .s_axis_term_tvalid (i_tvalid_arr[y][x]), + .s_axis_term_tready (i_tready_arr[y][x]), + .m_axis_term_tdata (o_tdata_arr [y][x]), + .m_axis_term_tlast (o_tlast_arr [y][x]), + .m_axis_term_tvalid (o_tvalid_arr[y][x]), + .m_axis_term_tready (o_tready_arr[y][x]), + // X-dim connections + .s_axis_xdim_tdata (e2w_tdata_arr [y][x] ), + .s_axis_xdim_tdest (e2w_tdest_arr [y][x] ), + .s_axis_xdim_tlast (e2w_tlast_arr [y][x] ), + .s_axis_xdim_tvalid (e2w_tvalid_arr[y][x] ), + .s_axis_xdim_tready (e2w_tready_arr[y][x] ), + .m_axis_xdim_tdata (e2w_tdata_arr [y][(x+1)%N]), + .m_axis_xdim_tdest (e2w_tdest_arr [y][(x+1)%N]), + .m_axis_xdim_tlast (e2w_tlast_arr [y][(x+1)%N]), + .m_axis_xdim_tvalid (e2w_tvalid_arr[y][(x+1)%N]), + .m_axis_xdim_tready (e2w_tready_arr[y][(x+1)%N]), + // Y-dim connections + .s_axis_ydim_tdata (s2n_tdata_arr [y][x] ), + .s_axis_ydim_tdest (s2n_tdest_arr [y][x] ), + .s_axis_ydim_tlast (s2n_tlast_arr [y][x] ), + .s_axis_ydim_tvalid (s2n_tvalid_arr[y][x] ), + .s_axis_ydim_tready (s2n_tready_arr[y][x] ), + .m_axis_ydim_tdata (s2n_tdata_arr [(y+1)%N][x]), + .m_axis_ydim_tdest (s2n_tdest_arr [(y+1)%N][x]), + .m_axis_ydim_tlast (s2n_tlast_arr [(y+1)%N][x]), + .m_axis_ydim_tvalid (s2n_tvalid_arr[(y+1)%N][x]), + .m_axis_ydim_tready (s2n_tready_arr[(y+1)%N][x]) + ); + end + end + end + endgenerate + + //------------------------------------------------------- + // Deadlock detector + //------------------------------------------------------- + // A deadlock is defined on an AXIS bus as an extended period + // where tvlid=1 but tready=0. If at least one slave port is in + // this state and none of the master ports are then this router + // will go into a failsafe deadlock recovery mode. The DEADLOCK_TIMEOUT + // parameter defines the duration for which this condition has + // to be true. In deadlock recovery mode, all routers are held in reset + // (thus losing all packets in flights) and all input ports are flushed. + + wire m_locked = |(m_axis_tvalid & ~m_axis_tready); + wire s_locked = |(s_axis_tvalid & ~s_axis_tready); + + // A counter that tracks the duration for which the router is livelocked + // If the livelock duration is higher than DEADLOCK_TIMEOUT then it is a + // deadlock + reg [$clog2(DEADLOCK_TIMEOUT)-1:0] deadlock_counter = DEADLOCK_TIMEOUT-1; + always @(posedge clk) begin + if (reset | ~(s_locked & ~m_locked)) begin + deadlock_counter <= DEADLOCK_TIMEOUT-1; + end else if (deadlock_counter != 'd0) begin + deadlock_counter <= deadlock_counter - 1; + end + end + + // A counter that tracks the deadlock recovery period. If the slave ports + // have no activity for DEADLOCK_TIMEOUT cycles then the router can + // successfully come out of the deadlocked state. + reg [$clog2(DEADLOCK_TIMEOUT)-1:0] deadlock_recover_counter = 'd0; + always @(posedge clk) begin + if (reset) begin + deadlock_recover_counter <= 'd0; + end else if (deadlock_detected) begin + if (|s_axis_tvalid) + deadlock_recover_counter <= DEADLOCK_TIMEOUT-1; + else + deadlock_recover_counter <= deadlock_recover_counter - 1; + end else if (deadlock_counter == 'd0) begin + deadlock_recover_counter <= DEADLOCK_TIMEOUT-1; + end + end + assign deadlock_detected = (deadlock_recover_counter != 0); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v new file mode 100644 index 000000000..6de082b4c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v @@ -0,0 +1,130 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ctrl_crossbar_nxn +// Description: +// This module implements a 2-dimentional (2d) mesh network (mesh) crossbar +// for AXIS-CTRL traffic. Supports mesh and torus topologies. +// It uses AXI-Stream for all of its links. +// The torus topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - WIDTH: Width of the AXI-Stream data bus +// - NPORTS: Number of ports (maximum 1024) +// - TOPOLOGY: Is this a mesh (MESH) or a torus (TORUS) topology +// - INGRESS_BUFF_SIZE: log2 of the ingress terminal buffer size (in words) +// - ROUTER_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words) +// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers. +// * WORMHOLE: Allocate route as soon as first word in pkt arrives +// * CUT-THROUGH: Allocate route only after the full pkt arrives +// - SWITCH_ALLOC: Algorithm to allocate the switch +// * PRIO: Priority based. Priority: Y-dim > X-dim > Term +// * ROUND-ROBIN: Round robin input port allocation +// - DEADLOCK_TIMEOUT: Number of cycles to wait until a deadlock is detected +// Signals: +// - s_axis_*: Slave port for router (flattened) +// - m_axis_*: Master port for router (flattened) +// + +module axis_ctrl_crossbar_nxn #( + parameter WIDTH = 32, + parameter NPORTS = 10, + parameter TOPOLOGY = "TORUS", + parameter INGRESS_BUFF_SIZE = 5, + parameter ROUTER_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", + parameter SWITCH_ALLOC = "PRIO", + parameter DEADLOCK_TIMEOUT = 16384 +) ( + input wire clk, + input wire reset, + // Inputs + input wire [(NPORTS*WIDTH)-1:0] s_axis_tdata, + input wire [NPORTS-1:0] s_axis_tlast, + input wire [NPORTS-1:0] s_axis_tvalid, + output wire [NPORTS-1:0] s_axis_tready, + // Output + output wire [(NPORTS*WIDTH)-1:0] m_axis_tdata, + output wire [NPORTS-1:0] m_axis_tlast, + output wire [NPORTS-1:0] m_axis_tvalid, + input wire [NPORTS-1:0] m_axis_tready, + // Deadlock alert + output wire deadlock_detected +); + + function integer csqrt_max1024; + input integer value; + integer i; + begin + csqrt_max1024 = 1; + for (i = 1; i <= 32; i = i + 1) // sqrt(1024) = 32 + csqrt_max1024 = csqrt_max1024 + (i*i < value ? 1 : 0); + end + endfunction + + localparam integer DIM_SIZE = csqrt_max1024(NPORTS); + + wire [(DIM_SIZE*DIM_SIZE*WIDTH)-1:0] i_tdata, o_tdata ; + wire [DIM_SIZE*DIM_SIZE-1:0] i_tlast, o_tlast ; + wire [DIM_SIZE*DIM_SIZE-1:0] i_tvalid, o_tvalid; + wire [DIM_SIZE*DIM_SIZE-1:0] i_tready, o_tready; + + // axis_ctrl_crossbar_2d_mesh needs to scale up in squares + // i.e. 4, 9, 16, 25, ... but NPORTS can be any number, so + // instantiate the next highest square number of ports and + // terminate the rest. + axis_ctrl_crossbar_2d_mesh #( + .WIDTH (WIDTH), + .DIM_SIZE (DIM_SIZE), + .TOPOLOGY (TOPOLOGY), + .INGRESS_BUFF_SIZE(INGRESS_BUFF_SIZE), + .ROUTER_BUFF_SIZE (ROUTER_BUFF_SIZE), + .ROUTING_ALLOC (ROUTING_ALLOC), + .SWITCH_ALLOC (SWITCH_ALLOC), + .DEADLOCK_TIMEOUT (DEADLOCK_TIMEOUT) + ) router_dut_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (i_tdata), + .s_axis_tlast (i_tlast), + .s_axis_tvalid (i_tvalid), + .s_axis_tready (i_tready), + .m_axis_tdata (o_tdata), + .m_axis_tlast (o_tlast), + .m_axis_tvalid (o_tvalid), + .m_axis_tready (o_tready), + .deadlock_detected(deadlock_detected) + ); + + // Connect the bottom NPORTS to the IO + assign i_tdata[(NPORTS*WIDTH)-1:0] = s_axis_tdata; + assign i_tlast[NPORTS-1:0] = s_axis_tlast; + assign i_tvalid[NPORTS-1:0] = s_axis_tvalid; + assign s_axis_tready = i_tready[NPORTS-1:0]; + + assign m_axis_tdata = o_tdata[(NPORTS*WIDTH)-1:0]; + assign m_axis_tlast = o_tlast[NPORTS-1:0]; + assign m_axis_tvalid = o_tvalid[NPORTS-1:0]; + assign o_tready[NPORTS-1:0] = m_axis_tready; + + // Terminate the rest + genvar i; + generate for (i = NPORTS; i < (DIM_SIZE*DIM_SIZE); i = i + 1) begin: ports + axis_port_terminator #(.DATA_W(WIDTH)) term_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (o_tdata[(i*WIDTH)+:WIDTH]), + .s_axis_tlast (o_tlast[i]), + .s_axis_tvalid(o_tvalid[i]), + .s_axis_tready(o_tready[i]), + .m_axis_tdata (i_tdata[(i*WIDTH)+:WIDTH]), + .m_axis_tlast (i_tlast[i]), + .m_axis_tvalid(i_tvalid[i]), + .m_axis_tready(i_tready[i]), + .pkts_dropped () + ); + end endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v new file mode 100644 index 000000000..fd10d6682 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v @@ -0,0 +1,178 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ingress_vc_buff +// Description: +// A wrapper around a buffer to implement one or more virtual channels +// Supports gate a packet for cut-through routing + +module axis_ingress_vc_buff #( + parameter WIDTH = 64, // Width of the datapath + parameter NUM_VCS = 2, // Number of virtual channels + parameter SIZE = 5, // Virtual channel buffer size + parameter ROUTING = "WORMHOLE", // Routing (switching) method {WORMHOLE, CUT-THROUGH} + parameter DEST_W = (NUM_VCS > 1) ? $clog2(NUM_VCS) : 1 // PRIVATE +) ( + input wire clk, + input wire reset, + input wire [WIDTH-1:0] s_axis_tdata, + input wire [DEST_W-1:0] s_axis_tdest, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + output wire [WIDTH-1:0] m_axis_tdata, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready +); + + generate if (NUM_VCS > 1) begin + //---------------------------------------------------- + // Multiple virtual channels + //---------------------------------------------------- + + wire [(WIDTH*NUM_VCS)-1:0] bufin_tdata , bufout_tdata ; + wire [NUM_VCS-1:0] bufin_tlast , bufout_tlast ; + wire [NUM_VCS-1:0] bufin_tvalid, bufout_tvalid; + wire [NUM_VCS-1:0] bufin_tready, bufout_tready; + + axi_demux #( + .WIDTH(WIDTH), .SIZE(NUM_VCS), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) vc_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (/* unused */), + .dest (s_axis_tdest ), + .i_tdata (s_axis_tdata ), + .i_tlast (s_axis_tlast ), + .i_tvalid (s_axis_tvalid), + .i_tready (s_axis_tready), + .o_tdata (bufin_tdata), + .o_tlast (bufin_tlast), + .o_tvalid (bufin_tvalid), + .o_tready (bufin_tready) + ); + + genvar vc; + for (vc = 0; vc < NUM_VCS; vc = vc + 1) begin + if (ROUTING == "WORMHOLE") begin + axi_fifo #( + .WIDTH(WIDTH+1), .SIZE(SIZE) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({bufin_tlast[vc], bufin_tdata [(vc*WIDTH)+:WIDTH]}), + .i_tvalid (bufin_tvalid [vc]), + .i_tready (bufin_tready [vc]), + .o_tdata ({bufout_tlast[vc], bufout_tdata [(vc*WIDTH)+:WIDTH]}), + .o_tvalid (bufout_tvalid[vc]), + .o_tready (bufout_tready[vc]), + .space (), + .occupied () + ); + end else begin + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(SIZE) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (bufin_tdata[(vc*WIDTH)+:WIDTH]), + .i_tlast (bufin_tlast[vc]), + .i_tvalid (bufin_tvalid[vc]), + .i_tready (bufin_tready[vc]), + .i_terror (1'b0), + .o_tdata (bufout_tdata[(vc*WIDTH)+:WIDTH]), + .o_tlast (bufout_tlast[vc]), + .o_tvalid (bufout_tvalid[vc]), + .o_tready (bufout_tready[vc]) + ); + end + end + + axi_mux #( + .WIDTH(WIDTH), .SIZE(NUM_VCS), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) vc_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (bufout_tdata ), + .i_tlast (bufout_tlast ), + .i_tvalid (bufout_tvalid), + .i_tready (bufout_tready), + .o_tdata (m_axis_tdata ), + .o_tlast (m_axis_tlast ), + .o_tvalid (m_axis_tvalid), + .o_tready (m_axis_tready) + ); + + end else begin + //---------------------------------------------------- + // Single virtual channel + //---------------------------------------------------- + wire [WIDTH-1:0] pipe_tdata; + wire pipe_tlast; + wire pipe_tvalid; + wire pipe_tready; + + if (ROUTING == "WORMHOLE") begin + axi_fifo #( + .WIDTH(WIDTH+1), .SIZE(SIZE) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({s_axis_tlast, s_axis_tdata}), + .i_tvalid (s_axis_tvalid ), + .i_tready (s_axis_tready ), + .o_tdata ({pipe_tlast, pipe_tdata}), + .o_tvalid (pipe_tvalid), + .o_tready (pipe_tready), + .space (), + .occupied () + ); + end else begin + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(SIZE) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_tdata), + .i_tlast (s_axis_tlast), + .i_tvalid (s_axis_tvalid), + .i_tready (s_axis_tready), + .i_terror (1'b0), + .o_tdata (pipe_tdata), + .o_tlast (pipe_tlast), + .o_tvalid (pipe_tvalid), + .o_tready (pipe_tready) + ); + end + + axi_fifo #( + .WIDTH(WIDTH+1), .SIZE(1) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({pipe_tlast, pipe_tdata}), + .i_tvalid (pipe_tvalid ), + .i_tready (pipe_tready ), + .o_tdata ({m_axis_tlast, m_axis_tdata}), + .o_tvalid (m_axis_tvalid), + .o_tready (m_axis_tready), + .space (), + .occupied () + ); + + end endgenerate + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v new file mode 100644 index 000000000..bf9fa24be --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v @@ -0,0 +1,44 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_port_terminator +// Description: +// A dummy terminator for unused crossbar ports + +module axis_port_terminator #( + parameter DATA_W = 64 +) ( + // Clocks and resets + input wire clk, + input wire reset, + // Input ports + input wire [DATA_W-1:0] s_axis_tdata, // Input data + input wire s_axis_tlast, // Input EOP (last) + input wire s_axis_tvalid, // Input valid + output wire s_axis_tready, // Input ready + // Output ports + output wire [DATA_W-1:0] m_axis_tdata, // Output data + output wire m_axis_tlast, // Output EOP (last) + output wire m_axis_tvalid, // Output valid + input wire m_axis_tready, // Output ready + // Metrics + output reg [15:0] pkts_dropped +); + + assign s_axis_tready = 1'b1; + assign m_axis_tdata = {DATA_W{1'b0}}; + assign m_axis_tlast = 1'b0; + assign m_axis_tvalid = 1'b0; + + always @(posedge clk) begin + if (reset) begin + pkts_dropped <= 'd0; + end else if (s_axis_tvalid & s_axis_tlast & s_axis_tready) begin + pkts_dropped <= pkts_dropped + 'd1; + end + end + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v new file mode 100644 index 000000000..24b9e4129 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v @@ -0,0 +1,164 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_switch +// Description: +// Implementation of a M-input, N-output AXI-Stream switch. +// One of the M input ports is allocated based on the s_axis_alloc signal +// and the packet on that port is sent to one of the N output ports based +// on the tdest signal + +module axis_switch #( + parameter DATA_W = 64, // tdata width + parameter DEST_W = 1, // Output tdest width + parameter IN_PORTS = 3, // Number of input ports + parameter OUT_PORTS = 3, // Number of output ports + parameter PIPELINE = 1, // Instantiate output pipeline stage? + parameter ALLOC_W = (IN_PORTS == 1) ? 1 : $clog2(IN_PORTS) //PRIVATE +) ( + // Clocks and resets + input wire clk, // Switch clock + input wire reset, // Reset + // Input ports + input wire [(DATA_W*IN_PORTS)-1:0] s_axis_tdata, // Input data + input wire [((DEST_W+$clog2(OUT_PORTS))*IN_PORTS)-1:0] s_axis_tdest, // Input destination + input wire [IN_PORTS-1:0] s_axis_tlast, // Input EOP (last) + input wire [IN_PORTS-1:0] s_axis_tvalid, // Input valid + output wire [IN_PORTS-1:0] s_axis_tready, // Input ready + input wire [ALLOC_W-1:0] s_axis_alloc, // Input port allocation for switch + // Output ports + output wire [(DATA_W*OUT_PORTS)-1:0] m_axis_tdata, // Output data + output wire [(DEST_W*OUT_PORTS)-1:0] m_axis_tdest, // Output destination + output wire [OUT_PORTS-1:0] m_axis_tlast, // Output EOP (last) + output wire [OUT_PORTS-1:0] m_axis_tvalid, // Output valid + input wire [OUT_PORTS-1:0] m_axis_tready // Output ready +); + // PRIVATE: Vivado synthesizer workaround (cannot be localparam) + localparam CLOG2_IN_PORTS = $clog2(IN_PORTS); + localparam CLOG2_OUT_PORTS = $clog2(OUT_PORTS); + + //--------------------------------------------------------- + // Flatten/unflatten and pipeline + //--------------------------------------------------------- + wire [DATA_W-1:0] i_tdata [0:IN_PORTS-1]; + wire [DEST_W+$clog2(OUT_PORTS)-1:0] i_tdest [0:IN_PORTS-1]; + wire i_tlast [0:IN_PORTS-1]; + wire [IN_PORTS-1:0] i_tvalid; + wire [IN_PORTS-1:0] i_tready; + wire [ALLOC_W-1:0] i_alloc; + wire [DATA_W-1:0] o_tdata [0:OUT_PORTS-1]; + wire [DEST_W-1:0] o_tdest [0:OUT_PORTS-1]; + wire o_tlast [0:OUT_PORTS-1]; + wire [OUT_PORTS-1:0] o_tvalid; + wire [OUT_PORTS-1:0] o_tready; + + genvar i, o; + generate + for (i = 0; i < IN_PORTS; i = i + 1) begin: in_ports + assign i_tdata [i] = s_axis_tdata [(i*DATA_W)+:DATA_W]; + assign i_tdest [i] = s_axis_tdest [(i*(DEST_W+CLOG2_OUT_PORTS))+:(DEST_W+CLOG2_OUT_PORTS)]; + assign i_tlast [i] = s_axis_tlast [i]; + assign i_tvalid [i] = s_axis_tvalid[i]; + assign s_axis_tready[i] = i_tready [i]; + end + assign i_alloc = s_axis_alloc; //i_alloc has to be delay matched to valid + + for (o = 0; o < OUT_PORTS; o = o + 1) begin + if (PIPELINE == 1) begin + axi_fifo_flop2 #(.WIDTH(DEST_W+1+DATA_W)) out_pipe_i ( + .clk(clk), .reset(reset), .clear(1'b0), + .i_tdata({o_tdest[o], o_tlast[o], o_tdata[o]}), + .i_tvalid(o_tvalid[o]), .i_tready(o_tready[o]), + .o_tdata({m_axis_tdest[(o*DEST_W)+:DEST_W], m_axis_tlast[o], m_axis_tdata[(o*DATA_W)+:DATA_W]}), + .o_tvalid(m_axis_tvalid[o]), .o_tready(m_axis_tready[o]), + .space(), .occupied() + ); + end else begin + assign m_axis_tdata [(o*DATA_W)+:DATA_W] = o_tdata [o]; + assign m_axis_tdest [(o*DEST_W)+:DEST_W] = o_tdest [o]; + assign m_axis_tlast [o] = o_tlast [o]; + assign m_axis_tvalid[o] = o_tvalid [o]; + assign o_tready [o] = m_axis_tready[o]; + end + end + endgenerate + + //--------------------------------------------------------- + // Allocator + //--------------------------------------------------------- + // The "chosen" input port will drive this bus + wire [DATA_W-1:0] master_tdata; + wire [DEST_W+$clog2(OUT_PORTS)-1:0] master_tdest; + wire master_tlast; + wire master_tvalid; + wire master_tready; + + generate if (IN_PORTS > 1) begin + reg [IN_PORTS-1:0] ialloc_oh; + reg [$clog2(IN_PORTS)-1:0] alloc_reg; + always @(posedge clk) begin + if (reset) begin + ialloc_oh <= {IN_PORTS{1'b0}}; + end else begin + if (ialloc_oh == {IN_PORTS{1'b0}}) begin + if (|i_tvalid) begin + ialloc_oh[i_alloc] <= 1'b1; + alloc_reg <= i_alloc; + end + end else begin + if(master_tready & master_tvalid & master_tlast) + ialloc_oh <= {IN_PORTS{1'b0}}; + end + end + end + + assign master_tdata = i_tdata[alloc_reg]; + assign master_tdest = i_tdest[alloc_reg]; + assign master_tlast = i_tlast[alloc_reg]; + assign master_tvalid = |(i_tvalid & ialloc_oh); + assign i_tready = i_tvalid & ialloc_oh & {IN_PORTS{master_tready}}; + end else begin + // Special case: One input port + assign master_tdata = i_tdata[0]; + assign master_tdest = i_tdest[0]; + assign master_tlast = i_tlast[0]; + assign master_tvalid = i_tvalid[0]; + assign i_tready[0] = master_tready; + end endgenerate + + //--------------------------------------------------------- + // Router + //--------------------------------------------------------- + generate if (OUT_PORTS > 1) begin + reg [OUT_PORTS-1:0] odst_oh; + always @(posedge clk) begin + if (reset) begin + odst_oh <= {OUT_PORTS{1'b0}}; + end else begin + if (odst_oh == {OUT_PORTS{1'b0}}) begin + if (master_tvalid) + odst_oh[master_tdest[CLOG2_OUT_PORTS-1:0]] <= 1'b1; + end else begin + if(master_tready & master_tvalid & master_tlast) + odst_oh <= {OUT_PORTS{1'b0}}; + end + end + end + assign master_tready = |(o_tready & odst_oh); + assign o_tvalid = {OUT_PORTS{master_tvalid}} & odst_oh; + end else begin + // Special case: One output port + assign master_tready = o_tready[0]; + assign o_tvalid[0] = master_tvalid; + end endgenerate + + generate for (o = 0; o < OUT_PORTS; o = o + 1) begin + assign o_tdata[o] = master_tdata; + assign o_tdest[o] = master_tdest[DEST_W+CLOG2_OUT_PORTS-1:CLOG2_OUT_PORTS]; + assign o_tlast[o] = master_tlast; + end endgenerate + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v b/fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v new file mode 100644 index 000000000..79f1a6626 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v @@ -0,0 +1,381 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_crossbar_nxn +// Description: +// This module implements a full-bandwidth NxN crossbar with N input and output ports +// for CHDR traffic. It supports multiple optimization strategies for performance, +// area and timing tradeoffs. It uses AXI-Stream for all of its links. The crossbar +// has a dynamic routing table based on a Content Addressable Memory (CAM). The SID +// is used to determine the destination of a packet and the routing table contains +// a re-programmable SID to crossbar port mapping. The table is programmed using +// special route config packets on the data input ports or using an optional +// management port. +// The topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - CHDR_W: Width of the AXI-Stream data bus +// - NPORTS: Number of ports to instantiate +// - DEFAULT_PORT: The failsafe port to forward a packet to is SID mapping is missing +// - MTU: log2 of max packet size (in words) +// - ROUTE_TBL_SIZE: log2 of the number of mappings that the routing table can hold +// at any time. Mapping values are maintained in a FIFO fashion. +// - MUX_ALLOC: Algorithm to allocate the egress MUX +// * PRIO: Priority based. Lower port numbers have a higher priority +// * ROUND-ROBIN: Round robin input port allocation +// - OPTIMIZE: Optimization strategy for performance vs area vs timing tradeoffs +// * AREA: Attempt to minimize area at the cost of performance (throughput) and/or timing +// * PERFORMANCE: Attempt to maximize performance at the cost of area and/or timing +// * TIMING: Attempt to maximize Fmax at the cost of area and/or performance +// - NPORTS_MGMT: Number of ports with management endpoint. The first NPORTS_MGMT ports will +// have the management port instantiated +// - EXT_RTCFG_PORT: Enable a side-channel AXI-Stream management port to configure the +// routing table +// Signals: +// - s_axis_*: Slave port for router (flattened) +// - m_axis_*: Master port for router (flattened) +// - s_axis_mgmt_*: Management slave port +// - device_id: The ID of the device that has instantiated this module +// + +module chdr_crossbar_nxn #( + parameter [15:0] PROTOVER = {8'd1, 8'd0}, + parameter CHDR_W = 64, + parameter [7:0] NPORTS = 8, + parameter [7:0] DEFAULT_PORT = 0, + parameter MTU = 9, + parameter ROUTE_TBL_SIZE = 6, + parameter MUX_ALLOC = "ROUND-ROBIN", + parameter OPTIMIZE = "AREA", + parameter [7:0] NPORTS_MGMT = NPORTS, + parameter [0:0] EXT_RTCFG_PORT = 0 +) ( + input wire clk, + input wire reset, + // Device info + input wire [15:0] device_id, + // Inputs + input wire [(CHDR_W*NPORTS)-1:0] s_axis_tdata, + input wire [NPORTS-1:0] s_axis_tlast, + input wire [NPORTS-1:0] s_axis_tvalid, + output wire [NPORTS-1:0] s_axis_tready, + // Output + output wire [(CHDR_W*NPORTS)-1:0] m_axis_tdata, + output wire [NPORTS-1:0] m_axis_tlast, + output wire [NPORTS-1:0] m_axis_tvalid, + input wire [NPORTS-1:0] m_axis_tready, + // Router config management port + input wire ext_rtcfg_stb, + input wire [15:0] ext_rtcfg_addr, + input wire [31:0] ext_rtcfg_data, + output wire ext_rtcfg_ack +); + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "../core/rfnoc_chdr_utils.vh" + `include "../core/rfnoc_chdr_internal_utils.vh" + + localparam NPORTS_W = $clog2(NPORTS); + localparam EPID_W = 16; + localparam [17:0] EXT_INFO = {1'b0, EXT_RTCFG_PORT, NPORTS_MGMT, NPORTS}; + + localparam [0:0] PKT_ST_HEAD = 1'b0; + localparam [0:0] PKT_ST_BODY = 1'b1; + + // The compute_mux_alloc function is the switch allocation function for the MUX + // i.e. it chooses which input port reserves the output MUX for packet transfer. + function [NPORTS_W-1:0] compute_mux_alloc; + input [NPORTS-1:0] pkt_waiting; + input [NPORTS_W-1:0] last_alloc; + reg signed [NPORTS_W:0] i; + begin + compute_mux_alloc = last_alloc; + for (i = NPORTS-1; i >= 0; i=i-1) begin + if (MUX_ALLOC == "PRIO") begin + // Priority. Lower port index gets a higher priority. + if (pkt_waiting[i]) + compute_mux_alloc = i; + end else begin + // Round-robin + if (pkt_waiting[(last_alloc + i + 1) % NPORTS]) + compute_mux_alloc = (last_alloc + i + 1) % NPORTS; + end + end + end + endfunction + + wire [NPORTS-1:0] rtcfg_req_wr; + wire [(16*NPORTS)-1:0] rtcfg_req_addr; + wire [(32*NPORTS)-1:0] rtcfg_req_data; + wire [NPORTS-1:0] rtcfg_resp_ack; + wire [(EPID_W*NPORTS)-1:0] find_tdata; + wire [NPORTS-1:0] find_tvalid; + wire [NPORTS-1:0] find_tready; + wire [(NPORTS_W*NPORTS)-1:0] result_tdata; + wire [NPORTS-1:0] result_tkeep; + wire [NPORTS-1:0] result_tvalid; + wire [NPORTS-1:0] result_tready; + + // Instantiate a single CAM-based routing table that will be shared between all + // input ports. Configuration and lookup is performed using an AXI-Stream iface. + // If multiple packets arrive simultaneously, only the headers of those packets will + // be serialized in order to arbitrate this map. Selection is done round-robin. + chdr_xb_routing_table #( + .SIZE(ROUTE_TBL_SIZE), .NPORTS(NPORTS), + .EXT_INS_PORT_EN(EXT_RTCFG_PORT) + ) routing_tbl_i ( + .clk (clk ), + .reset (reset ), + .port_req_wr (rtcfg_req_wr ), + .port_req_addr (rtcfg_req_addr), + .port_req_data (rtcfg_req_data), + .port_resp_ack (rtcfg_resp_ack), + .ext_req_wr (ext_rtcfg_stb ), + .ext_req_addr (ext_rtcfg_addr), + .ext_req_data (ext_rtcfg_data), + .ext_resp_ack (ext_rtcfg_ack ), + .axis_find_tdata (find_tdata ), + .axis_find_tvalid (find_tvalid ), + .axis_find_tready (find_tready ), + .axis_result_tdata (result_tdata ), + .axis_result_tkeep (result_tkeep ), + .axis_result_tvalid(result_tvalid ), + .axis_result_tready(result_tready ) + ); + + wire [CHDR_W-1:0] i_tdata [0:NPORTS-1]; + wire [9:0] i_tdest [0:NPORTS-1]; + wire [1:0] i_tid [0:NPORTS-1]; + wire i_tlast [0:NPORTS-1]; + wire i_tvalid [0:NPORTS-1]; + wire i_tready [0:NPORTS-1]; + wire [CHDR_W-1:0] buf_tdata [0:NPORTS-1]; + wire [NPORTS_W-1:0] buf_tdest [0:NPORTS-1], buf_tdest_tmp[0:NPORTS-1]; + wire buf_tkeep [0:NPORTS-1]; + wire buf_tlast [0:NPORTS-1]; + wire buf_tvalid[0:NPORTS-1]; + wire buf_tready[0:NPORTS-1]; + wire [CHDR_W-1:0] swi_tdata [0:NPORTS-1]; + wire [NPORTS_W-1:0] swi_tdest [0:NPORTS-1]; + wire swi_tlast [0:NPORTS-1]; + wire swi_tvalid[0:NPORTS-1]; + wire swi_tready[0:NPORTS-1]; + wire [(CHDR_W*NPORTS)-1:0] swo_tdata [0:NPORTS-1], muxi_tdata [0:NPORTS-1]; + wire [NPORTS-1:0] swo_tlast [0:NPORTS-1], muxi_tlast [0:NPORTS-1]; + wire [NPORTS-1:0] swo_tvalid[0:NPORTS-1], muxi_tvalid[0:NPORTS-1]; + wire [NPORTS-1:0] swo_tready[0:NPORTS-1], muxi_tready[0:NPORTS-1]; + + genvar n, i, j; + generate + for (n = 0; n < NPORTS; n = n + 1) begin: i_ports + // For each input port, first check if we have a management packet + // arriving. If it arrives, the top config commands are extrated, sent to the + // routing table for configuration, and the rest of the packet is forwarded + // down to the router. + // the router. + if (n < NPORTS_MGMT) begin + chdr_mgmt_pkt_handler #( + .PROTOVER(PROTOVER), .CHDR_W(CHDR_W), .MGMT_ONLY(0) + ) mgmt_ep_i ( + .clk (clk ), + .rst (reset ), + .node_info (chdr_mgmt_build_node_info(EXT_INFO, n, NODE_TYPE_XBAR, device_id)), + .s_axis_chdr_tdata (s_axis_tdata [(n*CHDR_W)+:CHDR_W] ), + .s_axis_chdr_tlast (s_axis_tlast [n] ), + .s_axis_chdr_tvalid (s_axis_tvalid[n] ), + .s_axis_chdr_tready (s_axis_tready[n] ), + .s_axis_chdr_tuser ('d0 ), + .m_axis_chdr_tdata (i_tdata [n] ), + .m_axis_chdr_tdest (i_tdest [n] ), + .m_axis_chdr_tid (i_tid [n] ), + .m_axis_chdr_tlast (i_tlast [n] ), + .m_axis_chdr_tvalid (i_tvalid [n] ), + .m_axis_chdr_tready (i_tready [n] ), + .ctrlport_req_wr (rtcfg_req_wr [n] ), + .ctrlport_req_rd (/* unused */ ), + .ctrlport_req_addr (rtcfg_req_addr[(n*16)+:16] ), + .ctrlport_req_data (rtcfg_req_data[(n*32)+:32] ), + .ctrlport_resp_ack (rtcfg_resp_ack[n] ), + .ctrlport_resp_data (32'h0 /* unused */ ), + .op_stb (/* unused */ ), + .op_dst_epid (/* unused */ ), + .op_src_epid (/* unused */ ), + .op_data (/* unused */ ) + ); + end else begin + assign i_tdata [n] = s_axis_tdata [(n*CHDR_W)+:CHDR_W]; + assign i_tid [n] = CHDR_MGMT_ROUTE_EPID; + assign i_tdest [n] = 10'd0; // Unused + assign i_tlast [n] = s_axis_tlast [n]; + assign i_tvalid [n] = s_axis_tvalid[n]; + assign s_axis_tready[n] = i_tready [n]; + + assign rtcfg_req_wr [n] = 1'b0; + assign rtcfg_req_addr[(n*16)+:16] = 16'h0; + assign rtcfg_req_data[(n*32)+:32] = 32'h0; + end + + // Ingress buffer module that does the following: + // - Stores and gates an incoming packet + // - Looks up destination in routing table and attaches a tdest for the packet + chdr_xb_ingress_buff #( + .WIDTH(CHDR_W), .MTU(MTU), .DEST_W(NPORTS_W), .NODE_ID(n) + ) buf_i ( + .clk (clk ), + .reset (reset ), + .s_axis_chdr_tdata (i_tdata [n] ), + .s_axis_chdr_tdest (i_tdest [n][NPORTS_W-1:0] ), + .s_axis_chdr_tid (i_tid [n] ), + .s_axis_chdr_tlast (i_tlast [n] ), + .s_axis_chdr_tvalid (i_tvalid [n] ), + .s_axis_chdr_tready (i_tready [n] ), + .m_axis_chdr_tdata (buf_tdata [n] ), + .m_axis_chdr_tdest (buf_tdest_tmp[n] ), + .m_axis_chdr_tkeep (buf_tkeep [n] ), + .m_axis_chdr_tlast (buf_tlast [n] ), + .m_axis_chdr_tvalid (buf_tvalid [n] ), + .m_axis_chdr_tready (buf_tready [n] ), + .m_axis_find_tdata (find_tdata [(n*EPID_W)+:EPID_W] ), + .m_axis_find_tvalid (find_tvalid [n] ), + .m_axis_find_tready (find_tready [n] ), + .s_axis_result_tdata (result_tdata [(n*NPORTS_W)+:NPORTS_W]), + .s_axis_result_tkeep (result_tkeep [n] ), + .s_axis_result_tvalid(result_tvalid[n] ), + .s_axis_result_tready(result_tready[n] ) + ); + assign buf_tdest[n] = buf_tkeep[n] ? buf_tdest_tmp[n] : DEFAULT_PORT[NPORTS_W-1:0]; + + // Pipeline state + axi_fifo #( + .WIDTH(CHDR_W+1+NPORTS_W), .SIZE(1) + ) pipe_i ( + .clk (clk ), + .reset (reset ), + .clear (1'b0 ), + .i_tdata ({buf_tlast[n], buf_tdest[n], buf_tdata[n]}), + .i_tvalid (buf_tvalid[n] ), + .i_tready (buf_tready[n] ), + .o_tdata ({swi_tlast[n], swi_tdest[n], swi_tdata[n]}), + .o_tvalid (swi_tvalid[n] ), + .o_tready (swi_tready[n] ), + .space (/* Unused */ ), + .occupied (/* Unused */ ) + ); + + // Ingress demux. Use the tdest field to determine packet destination + axis_switch #( + .DATA_W(CHDR_W), .DEST_W(1), .IN_PORTS(1), .OUT_PORTS(NPORTS), .PIPELINE(1) + ) demux_i ( + .clk (clk ), + .reset (reset ), + .s_axis_tdata (swi_tdata [n] ), + .s_axis_tdest ({1'b0, swi_tdest [n]}), + .s_axis_tlast (swi_tlast [n] ), + .s_axis_tvalid (swi_tvalid[n] ), + .s_axis_tready (swi_tready[n] ), + .s_axis_alloc (1'b0 ), + .m_axis_tdata (swo_tdata [n] ), + .m_axis_tdest (/* Unused */ ), + .m_axis_tlast (swo_tlast [n] ), + .m_axis_tvalid (swo_tvalid[n] ), + .m_axis_tready (swo_tready[n] ) + ); + end + + for (i = 0; i < NPORTS; i = i + 1) begin + for (j = 0; j < NPORTS; j = j + 1) begin + assign muxi_tdata [i][j*CHDR_W+:CHDR_W] = swo_tdata [j][i*CHDR_W+:CHDR_W]; + assign muxi_tlast [i][j] = swo_tlast [j][i]; + assign muxi_tvalid[i][j] = swo_tvalid [j][i]; + assign swo_tready [i][j] = muxi_tready[j][i]; + end + end + + for (n = 0; n < NPORTS; n = n + 1) begin: o_ports + if (OPTIMIZE == "PERFORMANCE") begin + // Use the axis_switch module when optimizing for performance + // This logic has some extra levels of logic to ensure + // that the switch allocation happens in 0 clock cycles which + // means that Fmax for this implementation will be lower. + + wire mux_ready = |muxi_tready[n]; // Max 1 bit should be high + wire mux_valid = |muxi_tvalid[n]; + wire mux_last = |(muxi_tvalid[n] & muxi_tlast[n]); + + // Track the input packet state + reg [0:0] pkt_state = PKT_ST_HEAD; + always @(posedge clk) begin + if (reset) begin + pkt_state <= PKT_ST_HEAD; + end else if (mux_valid & mux_ready) begin + pkt_state <= mux_last ? PKT_ST_HEAD : PKT_ST_BODY; + end + end + + // The switch requires the allocation to stay valid until the + // end of the packet. We also might need to keep the previous + // packet's allocation to compute the current one + reg [NPORTS_W-1:0] prev_sw_alloc = {NPORTS_W{1'b0}}; + reg [NPORTS_W-1:0] pkt_sw_alloc = {NPORTS_W{1'b0}}; + wire [NPORTS_W-1:0] muxi_sw_alloc = (mux_valid && pkt_state == PKT_ST_HEAD) ? + compute_mux_alloc(muxi_tvalid[n], prev_sw_alloc) : pkt_sw_alloc; + + always @(posedge clk) begin + if (reset) begin + prev_sw_alloc <= {NPORTS_W{1'b0}}; + pkt_sw_alloc <= {NPORTS_W{1'b0}}; + end else if (mux_valid & mux_ready) begin + if (pkt_state == PKT_ST_HEAD) + pkt_sw_alloc <= muxi_sw_alloc; + if (mux_last) + prev_sw_alloc <= muxi_sw_alloc; + end + end + + axis_switch #( + .DATA_W(CHDR_W), .DEST_W(1), .IN_PORTS(NPORTS), .OUT_PORTS(1), + .PIPELINE(0) + ) mux_i ( + .clk (clk ), + .reset (reset ), + .s_axis_tdata (muxi_tdata [n] ), + .s_axis_tdest ({NPORTS{1'b0}} /* Unused */ ), + .s_axis_tlast (muxi_tlast [n] ), + .s_axis_tvalid (muxi_tvalid[n] ), + .s_axis_tready (muxi_tready[n] ), + .s_axis_alloc (muxi_sw_alloc ), + .m_axis_tdata (m_axis_tdata [(n*CHDR_W)+:CHDR_W]), + .m_axis_tdest (/* Unused */ ), + .m_axis_tlast (m_axis_tlast [n] ), + .m_axis_tvalid (m_axis_tvalid[n] ), + .m_axis_tready (m_axis_tready[n] ) + ); + end else begin + // axi_mux has an additional bubble cycle but the logic + // to allocate an input port has fewer levels and takes + // up fewer resources. + axi_mux #( + .PRIO(MUX_ALLOC == "PRIO"), .WIDTH(CHDR_W), .SIZE(NPORTS), + .PRE_FIFO_SIZE(OPTIMIZE == "TIMING" ? 1 : 0), .POST_FIFO_SIZE(1) + ) mux_i ( + .clk (clk ), + .reset (reset ), + .clear (1'b0 ), + .i_tdata (muxi_tdata [n] ), + .i_tlast (muxi_tlast [n] ), + .i_tvalid (muxi_tvalid [n] ), + .i_tready (muxi_tready [n] ), + .o_tdata (m_axis_tdata [(n*CHDR_W)+:CHDR_W]), + .o_tlast (m_axis_tlast [n] ), + .o_tvalid (m_axis_tvalid[n] ), + .o_tready (m_axis_tready[n] ) + ); + end + end + endgenerate + + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v new file mode 100644 index 000000000..dcb11da8e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v @@ -0,0 +1,259 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_ingress_buff +// +// Description: +// +// Ingress buffer module for the CHDR crossbar. This module stores and gates +// the incoming packet and simultaneously determines the destination (TDEST) +// by inspecting the incoming TID. If the TID is CHDR_MGMT_ROUTE_EPID then we +// perform a lookup on the TID to determine the correct output for TDEST. +// +// Parameters: +// +// WIDTH : Data width of the CHDR interfaces (TDATA) +// MTU : Maximum transmission unit, in WIDTH-sized words, is 2**MTU +// DEST_W : Width of the destination routing information (TDEST) +// NODE_ID : Numeric identifier for this port +// + +module chdr_xb_ingress_buff #( + parameter WIDTH = 64, + parameter MTU = 5, + parameter DEST_W = 4, + parameter [9:0] NODE_ID = 0 +) ( + input wire clk, + input wire reset, + // CHDR input port + input wire [WIDTH-1:0] s_axis_chdr_tdata, + input wire [DEST_W-1:0] s_axis_chdr_tdest, + input wire [1:0] s_axis_chdr_tid, + input wire s_axis_chdr_tlast, + input wire s_axis_chdr_tvalid, + output wire s_axis_chdr_tready, + // CHDR output port (with a tdest and tkeep) + output wire [WIDTH-1:0] m_axis_chdr_tdata, + output wire [DEST_W-1:0] m_axis_chdr_tdest, + output wire m_axis_chdr_tkeep, + output wire m_axis_chdr_tlast, + output wire m_axis_chdr_tvalid, + input wire m_axis_chdr_tready, + // Find port going to routing table + output wire [15:0] m_axis_find_tdata, + output wire m_axis_find_tvalid, + input wire m_axis_find_tready, + // Result port from routing table + input wire [DEST_W-1:0] s_axis_result_tdata, + input wire s_axis_result_tkeep, + input wire s_axis_result_tvalid, + output wire s_axis_result_tready +); + + // RFNoC Includes + `include "../core/rfnoc_chdr_utils.vh" + `include "../core/rfnoc_chdr_internal_utils.vh" + + + //--------------------------------------------------------------------------- + // Packet Buffer + //--------------------------------------------------------------------------- + + wire [WIDTH-1:0] gate_i_tdata , gate_o_tdata ; + wire gate_i_tlast , gate_o_tlast ; + wire gate_i_tvalid, gate_o_tvalid; + wire gate_i_tready, gate_o_tready; + + // The axi_packet_gate queues up an entire packet before letting it go out. + // This reduces congestion in the crossbar for slowly-built packets. + axi_packet_gate #( + .WIDTH (WIDTH), + .SIZE (MTU) + ) axi_packet_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (gate_i_tdata), + .i_tlast (gate_i_tlast), + .i_terror (1'b0), + .i_tvalid (gate_i_tvalid), + .i_tready (gate_i_tready), + .o_tdata (gate_o_tdata), + .o_tlast (gate_o_tlast), + .o_tvalid (gate_o_tvalid), + .o_tready (gate_o_tready) + ); + + + //--------------------------------------------------------------------------- + // Destination (TDEST) Muxing + //--------------------------------------------------------------------------- + + wire [15:0] find_tdata; + wire find_tvalid, find_tready; + + wire [DEST_W-1:0] dest_i_tdata; + wire dest_i_tkeep, dest_i_tvalid, dest_i_tready; + wire [DEST_W-1:0] dest_o_tdata; + wire dest_o_tkeep, dest_o_tvalid, dest_o_tready; + + // The find_fifo holds the lookup requests from the find_* AXI stream and + // sends them on to the m_axis_find_* stream port. It is required because the + // input logic (see below) doesn't obey the AXI handshake protocol but this + // FIFO can tolerate it. + axi_fifo #( + .WIDTH (16), + .SIZE (1) + ) find_fifo_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (find_tdata), + .i_tvalid (find_tvalid), + .i_tready (find_tready), + .o_tdata (m_axis_find_tdata), + .o_tvalid (m_axis_find_tvalid), + .o_tready (m_axis_find_tready), + .space (), + .occupied () + ); + + // The destination (TDEST) can come from two sources: Directly from the + // packet info (in which case TDEST was immediately determined and comes in + // on dest_* AXI stream) or via a lookup (in which case the result comes in + // on s_axis_result_*). Only one of these data paths is used at a time, so we + // mux them together here create a single stream (dest_o_*) that contains the + // destination for the next packet. + axi_mux #( + .WIDTH (DEST_W+1), + .SIZE (2), + .PRIO (1), + .PRE_FIFO_SIZE (1), + .POST_FIFO_SIZE (1) + ) dest_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({dest_i_tkeep, dest_i_tdata, + s_axis_result_tkeep, s_axis_result_tdata}), + .i_tlast (2'b11), + .i_tvalid ({dest_i_tvalid, s_axis_result_tvalid}), + .i_tready ({dest_i_tready, s_axis_result_tready}), + .o_tdata ({dest_o_tkeep, dest_o_tdata}), + .o_tlast (), + .o_tvalid (dest_o_tvalid), + .o_tready (dest_o_tready) + ); + + + //--------------------------------------------------------------------------- + // Input Logic + //--------------------------------------------------------------------------- + // + // When a packet comes in, we may have to do one of the following: + // 1) Lookup the TDEST using the EPID + // 2) Use the specified input TDEST + // 3) Use the NODE_ID as the TDEST (to return the packet) + // + //--------------------------------------------------------------------------- + + // The s_axis_chdr_hdr_valid signal indicates when TDATA and TID contain the + // header information for the current packet. + reg s_axis_chdr_hdr_valid = 1'b1; + + always @(posedge clk) begin + if (reset) begin + s_axis_chdr_hdr_valid <= 1'b1; + end else if (s_axis_chdr_tvalid & s_axis_chdr_tready) begin + s_axis_chdr_hdr_valid <= s_axis_chdr_tlast; + end + end + + // The dest_find_tready signal indicates if the find_fifo is ready or if the + // dest port of the dest_muax is ready, depending on which path will be used. + reg dest_find_tready; + + always @(*) begin + if (s_axis_chdr_hdr_valid) begin + case (s_axis_chdr_tid) + CHDR_MGMT_ROUTE_EPID: + dest_find_tready = find_tready; + CHDR_MGMT_ROUTE_TDEST: + dest_find_tready = dest_i_tready; + CHDR_MGMT_RETURN_TO_SRC: + dest_find_tready = dest_i_tready; + default: + dest_find_tready = dest_i_tready; // We should never get here + endcase + end else begin + dest_find_tready = 1'b1; + end + end + + // We can accept a transfer from the input CHDR stream only if the the packet + // gate and dest/find datapaths are ready. + assign s_axis_chdr_tready = s_axis_chdr_tvalid && + gate_i_tready && + dest_find_tready; + + // The chdr_header_stb signal indicates when we write data into the dest/find + // data path. This happens when we're accepting the header word of the packet + // into the packet gate. + wire chdr_header_stb = s_axis_chdr_tvalid && + s_axis_chdr_tready && + s_axis_chdr_hdr_valid; + + // ************************************************************************** + // WARNING: The logic below violates AXI-Stream by having a tready -> tvalid + // dependency To ensure no deadlocks, we must place FIFOs downstream + // of gate_i_*, find_* and dest_i_* + + // Here we decide if we need to do a lookup using the find_* path or if the + // destination is known and can be put directly on the dest_* path. + // + // Start a lookup request if the TID is CHDR_MGMT_ROUTE_EPID. + assign find_tdata = chdr_get_dst_epid(s_axis_chdr_tdata[63:0]); + assign find_tvalid = chdr_header_stb && + (s_axis_chdr_tid == CHDR_MGMT_ROUTE_EPID); + // Set TDEST directly if TID is CHDR_MGMT_ROUTE_TDEST or + // CHDR_MGMT_RETURN_TO_SRC. + assign dest_i_tdata = (s_axis_chdr_tid == CHDR_MGMT_ROUTE_TDEST) ? + s_axis_chdr_tdest : NODE_ID[DEST_W-1:0]; + assign dest_i_tkeep = 1'b1; + assign dest_i_tvalid = chdr_header_stb && + (s_axis_chdr_tid != CHDR_MGMT_ROUTE_EPID); + + // Input logic for axi_packet_gate + assign gate_i_tdata = s_axis_chdr_tdata; + assign gate_i_tlast = s_axis_chdr_tlast; + assign gate_i_tvalid = s_axis_chdr_tready && s_axis_chdr_tvalid; + + // + // ************************************************************************** + + + //--------------------------------------------------------------------------- + // Output Logic + //--------------------------------------------------------------------------- + // + // The destination for the packet (TDEST) must be valid before we allow the + // header of the packet to pass through. So the packet must be blocked until + // the output of the dest_o_* is valid. TDEST and TKEEP must remain valid + // until the end of the packet. + // + //--------------------------------------------------------------------------- + + assign m_axis_chdr_tdata = gate_o_tdata; + assign m_axis_chdr_tlast = gate_o_tlast; + assign m_axis_chdr_tdest = dest_o_tdata; + assign m_axis_chdr_tkeep = dest_o_tkeep; + assign m_axis_chdr_tvalid = gate_o_tvalid && dest_o_tvalid; + + assign gate_o_tready = m_axis_chdr_tvalid && m_axis_chdr_tready; + assign dest_o_tready = m_axis_chdr_tvalid && m_axis_chdr_tready && m_axis_chdr_tlast; + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v new file mode 100644 index 000000000..f445efc68 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v @@ -0,0 +1,122 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_xb_routing_table +// Description: +// A routing table for the CHDR crossbar. This table is designed +// to be shared between all ports. It has an AXI-Stream lookup +// interface and a ctrlport (reduced) configuration interface. + +module chdr_xb_routing_table #( + parameter SIZE = 6, + parameter NPORTS = 4, + parameter EXT_INS_PORT_EN = 1 +) ( + // Clocks and resets + input wire clk, + input wire reset, + // Insertion Interface (for XB ports) + input wire [NPORTS-1:0] port_req_wr, + input wire [(16*NPORTS)-1:0] port_req_addr, + input wire [(32*NPORTS)-1:0] port_req_data, + output wire [NPORTS-1:0] port_resp_ack, + // Insertion Interface (External) + input wire ext_req_wr, + input wire [15:0] ext_req_addr, + input wire [31:0] ext_req_data, + output wire ext_resp_ack, + // Find Interface + input wire [(16*NPORTS)-1:0] axis_find_tdata, + input wire [NPORTS-1:0] axis_find_tvalid, + output wire [NPORTS-1:0] axis_find_tready, + // Result Interface (for Find) + output wire [($clog2(NPORTS)*NPORTS)-1:0] axis_result_tdata, + output wire [NPORTS-1:0] axis_result_tkeep, + output wire [NPORTS-1:0] axis_result_tvalid, + input wire [NPORTS-1:0] axis_result_tready +); + localparam NPORTS_W = $clog2(NPORTS); + localparam CFG_W = NPORTS_W + 16; + localparam CFG_PORTS = NPORTS + EXT_INS_PORT_EN; + + // CAM-based lookup table + + wire [15:0] insert_tdest; + wire [NPORTS_W-1:0] insert_tdata; + wire insert_tvalid; + wire insert_tready; + + axis_muxed_kv_map #( + .KEY_WIDTH(16), .VAL_WIDTH(NPORTS_W), + .SIZE(SIZE), .NUM_PORTS(NPORTS) + ) kv_map_i ( + .clk (clk ), + .reset (reset ), + .axis_insert_tdata (insert_tdata ), + .axis_insert_tdest (insert_tdest ), + .axis_insert_tvalid(insert_tvalid ), + .axis_insert_tready(insert_tready ), + .axis_find_tdata (axis_find_tdata ), + .axis_find_tvalid (axis_find_tvalid ), + .axis_find_tready (axis_find_tready ), + .axis_result_tdata (axis_result_tdata ), + .axis_result_tkeep (axis_result_tkeep ), + .axis_result_tvalid(axis_result_tvalid), + .axis_result_tready(axis_result_tready) + ); + + // Logic to convert from ctrlport to AXI-Stream + + wire ins_req_wr [0:CFG_PORTS-1]; + wire [15:0] ins_req_addr[0:CFG_PORTS-1]; + wire [NPORTS_W-1:0] ins_req_data[0:CFG_PORTS-1]; + wire ins_resp_ack[0:CFG_PORTS-1]; + + reg [(CFG_PORTS*CFG_W)-1:0] cfg_tdata; + reg [CFG_PORTS-1:0] cfg_tvalid = {CFG_PORTS{1'b0}}; + wire [CFG_PORTS-1:0] cfg_tready; + + genvar i; + generate for (i = 0; i < CFG_PORTS; i=i+1) begin + assign ins_req_wr [i] = (i < NPORTS) ? port_req_wr[i] : ext_req_wr; + assign ins_req_addr[i] = (i < NPORTS) ? port_req_addr[i*16 +: 16] : ext_req_addr; + assign ins_req_data[i] = (i < NPORTS) ? port_req_data[i*32 +: NPORTS_W] : ext_req_data[NPORTS_W-1:0]; + if (i < NPORTS) + assign port_resp_ack[i] = ins_resp_ack[i]; + else + assign ext_resp_ack = ins_resp_ack[i]; + + always @(posedge clk) begin + if (reset) begin + cfg_tvalid[i] <= 1'b0; + end else begin + if (~cfg_tvalid[i]) begin + if (ins_req_wr[i]) begin + cfg_tvalid[i] <= 1'b1; + cfg_tdata[(CFG_W*i) +: CFG_W] <= {ins_req_data[i], ins_req_addr[i]}; + end + end else begin + cfg_tvalid[i] <= ~cfg_tready[i]; + end + end + end + assign ins_resp_ack[i] = cfg_tvalid[i] & cfg_tready[i]; + end endgenerate + + // Multiplexer between XB ports and external cfg + + axi_mux #( + .WIDTH(CFG_W), .SIZE(CFG_PORTS), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) rtcfg_mux_i ( + .clk(clk), .reset(reset), .clear(1'b0), + .i_tdata(cfg_tdata), .i_tlast({(NPORTS_W + 16){1'b1}}), + .i_tvalid(cfg_tvalid), .i_tready(cfg_tready), + .o_tdata({insert_tdata, insert_tdest}), .o_tlast(), + .o_tvalid(insert_tvalid), .o_tready(insert_tready) + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile new file mode 100644 index 000000000..7fa7ae03b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile @@ -0,0 +1,52 @@ +# +# Copyright 2015 Ettus Research LLC +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Define part using PART_ID (//) +ARCH = kintex7 +PART_ID = xc7k410t/ffg900/-2 + +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/control/Makefile.srcs +include $(BASE_DIR)/../lib/fifo/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs + +DESIGN_SRCS = $(abspath \ +$(FIFO_SRCS) \ +$(CONTROL_LIB_SRCS) \ +$(RFNOC_XBAR_SRCS) \ +$(RFNOC_CORE_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +TB_TOP_MODULE ?= crossbar_tb +SIM_TOP = $(TB_TOP_MODULE) + +SIM_SRCS = \ +$(abspath chdr_traffic_source_sim.sv) \ +$(abspath chdr_traffic_sink_sim.sv) \ +$(abspath crossbar_tb.sv) \ +$(abspath $(TB_TOP_MODULE).sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile new file mode 100644 index 000000000..0f1a10a6e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile @@ -0,0 +1,51 @@ +# +# Copyright 2015 Ettus Research LLC +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Define part using PART_ID (//) +ARCH = kintex7 +PART_ID = xc7k410t/ffg900/-2 + +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/control/Makefile.srcs +include $(BASE_DIR)/../lib/fifo/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs + +DESIGN_SRCS = $(abspath \ +$(FIFO_SRCS) \ +$(CONTROL_LIB_SRCS) \ +$(RFNOC_XBAR_SRCS) \ +$(RFNOC_CORE_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +SIM_TOP = axis_ctrl_crossbar_nxn_tb + +SIM_SRCS = \ +$(abspath axis_ctrl_crossbar_nxn_tb.sv) \ +$(abspath ../crossbar_tb.sv) \ +$(abspath ../chdr_traffic_source_sim.sv) \ +$(abspath ../chdr_traffic_sink_sim.sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv new file mode 100644 index 000000000..fa112f5cb --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv @@ -0,0 +1,26 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + + +`timescale 1ns/1ps + +module axis_ctrl_crossbar_nxn_tb(); + crossbar_tb #( + .TEST_NAME ("axis_ctrl_crossbar_nxn_tb"), + .ROUTER_IMPL ("axis_ctrl_2d_torus" ), // Router implementation + .ROUTER_PORTS (20 ), // Number of ports + .ROUTER_DWIDTH (64 ), // Router datapath width + .MTU_LOG2 (5 ), // log2 of max packet size for router + .NUM_MASTERS (4 ), // Number of data generators in test + .TEST_MAX_PACKETS (100 ), // How many packets to stream per test case? + .TEST_LPP (20 ), // Lines per packet + .TEST_MIN_INJ_RATE (10 ), // Minimum injection rate to test + .TEST_MAX_INJ_RATE (40 ), // Maximum injection rate to test + .TEST_INJ_RATE_INCR (10 ), // Injection rate increment + .TEST_GEN_LL_FILES (0 ) // Generate files to produce load-latency graphs? + ) impl ( + /* no IO */ + ); +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile new file mode 100644 index 000000000..399515640 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile @@ -0,0 +1,51 @@ +# +# Copyright 2015 Ettus Research LLC +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Define part using PART_ID (//) +ARCH = kintex7 +PART_ID = xc7k410t/ffg900/-2 + +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/control/Makefile.srcs +include $(BASE_DIR)/../lib/fifo/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs + +DESIGN_SRCS = $(abspath \ +$(FIFO_SRCS) \ +$(CONTROL_LIB_SRCS) \ +$(RFNOC_XBAR_SRCS) \ +$(RFNOC_CORE_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +SIM_TOP = chdr_crossbar_nxn_tb + +SIM_SRCS = \ +$(abspath chdr_crossbar_nxn_tb.sv) \ +$(abspath ../crossbar_tb.sv) \ +$(abspath ../chdr_traffic_source_sim.sv) \ +$(abspath ../chdr_traffic_sink_sim.sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv new file mode 100644 index 000000000..1c5cace63 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv @@ -0,0 +1,26 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + + +`timescale 1ns/1ps + +module chdr_crossbar_nxn_tb(); + crossbar_tb #( + .TEST_NAME ("chdr_crossbar_nxn_tb"), + .ROUTER_IMPL ("chdr_crossbar_nxn" ), // Router implementation + .ROUTER_PORTS (10 ), // Number of ports + .ROUTER_DWIDTH (64 ), // Router datapath width + .MTU_LOG2 (7 ), // log2 of max packet size for router + .NUM_MASTERS (10 ), // Number of data generators in test + .TEST_MAX_PACKETS (100 ), // How many packets to stream per test case? + .TEST_LPP (100 ), // Lines per packet + .TEST_MIN_INJ_RATE (60 ), // Minimum injection rate to test + .TEST_MAX_INJ_RATE (100 ), // Maximum injection rate to test + .TEST_INJ_RATE_INCR (10 ), // Injection rate increment + .TEST_GEN_LL_FILES (0 ) // Generate files to produce load-latency graphs? + ) impl ( + /* no IO */ + ); +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv new file mode 100644 index 000000000..a9fe3ba27 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv @@ -0,0 +1,150 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_traffic_sink_sim +// Description: +// A sink for CHDR traffic. Simulation only. +// Accepts packets and computes the following metrics: +// - Data integrity errors +// - Packet latency +// - Throughput counts +// All metrics can optionally be written to a file to +// generate load-latency graphs. + +`timescale 1ns/1ps + +`include "sim_cvita_lib.svh" + +module chdr_traffic_sink_sim #( + parameter WIDTH = 64, + parameter MTU = 5, + parameter [15:0] NODE_ID = 'd0, + parameter [15:0] NUM_NODES = 'd16, + parameter FILE_PATH = ".", + parameter FLUSH_N = 4 +) ( + // Clocks and resets + input clk, + input rst, + // Settings + input [63:0] current_time, + input start_stb, + input [7:0] injection_rate, + input [15:0] lines_per_pkt, + input [7:0] traffic_patt, + // CHDR master interface + input [WIDTH-1:0] s_axis_tdata, + input s_axis_tlast, + input s_axis_tvalid, + output s_axis_tready, + // Metrics + output session_active, + output [31:0] xfer_count, + output [31:0] pkt_count, + output [31:0] data_err_count, + output [31:0] route_err_count +); + + // Constants + localparam integer ERR_BIT_PKT_SIZE_MISMATCH = 1; + localparam integer ERR_BIT_PKT_DATA_MISMATCH = 2; + localparam integer ERR_BIT_PKT_DEST_MISMATCH = 4; + localparam integer ERR_BIT_PKT_SEQUENCE_ERR = 8; + + cvita_slave #(.DWIDTH(WIDTH)) s_chdr (.clk(clk)); + cvita_pkt_t pkt; + + assign s_chdr.axis.tdata = s_axis_tdata; + assign s_chdr.axis.tlast = s_axis_tlast; + assign s_chdr.axis.tvalid = s_axis_tvalid; + assign s_axis_tready = s_chdr.axis.tready; + + logic running = 0; + integer num_data_errs = 0; + integer num_route_errs = 0; + logic [31:0] num_pkts_xferd = 0; + logic [31:0] num_samps_xferd = 0; + + assign data_err_count = num_data_errs; + assign route_err_count = num_route_errs; + assign xfer_count = num_samps_xferd; + assign pkt_count = num_pkts_xferd; + assign session_active = running; + + integer session = 0; + string filename; + integer handle = 0; + integer err = 0; + integer bus_idle_cnt = 0; + logic [WIDTH-1:0] i; + + // Egress buff in source is MTU + 4 + localparam integer IDLE_TIMEOUT = (1 << (MTU + 4 + FLUSH_N)); + + initial begin: consume_blk + // Consume infinitely + s_chdr.reset(); + while (1) begin + // A session begins on the posedge of start_stb + while (~start_stb) @(posedge clk); + session = session + 1; + $sformat(filename, "%s/pkts_node%05d_inj%03d_lpp%05d_traffic%c_sess%04d.csv", + FILE_PATH, NODE_ID, injection_rate, lines_per_pkt, traffic_patt, session); + if (FILE_PATH != "") begin + handle = $fopen(filename, "w"); + if (handle == 0) begin + $error("Could not open file: %s", filename); + $finish(); + end + end + if (handle != 0) $fdisplay(handle, "Src,Dst,Seqno,Error,Latency"); + s_chdr.reset(); + num_data_errs = 0; + num_route_errs = 0; + num_pkts_xferd = 0; + num_samps_xferd = 0; + bus_idle_cnt = 0; + running = 1; + while (1) begin + // Pull packet from bus + err = 0; + if (~s_chdr.axis.tvalid[0]) begin + @(posedge clk); + bus_idle_cnt = bus_idle_cnt + 1; + if (bus_idle_cnt <= IDLE_TIMEOUT) + continue; + else + break; + end + s_chdr.pull_pkt(pkt, 0); + bus_idle_cnt = 0; + num_pkts_xferd = num_pkts_xferd + 1; + num_samps_xferd = num_samps_xferd + lines_per_pkt; + // Validate packet + if (pkt.hdr.dst_sid != NODE_ID) begin + err = err + ERR_BIT_PKT_DEST_MISMATCH; + num_route_errs = num_route_errs + 1; + end + if (pkt.payload.size() != lines_per_pkt-2) begin + err = err + ERR_BIT_PKT_SIZE_MISMATCH; + num_data_errs = num_data_errs + 1; + end else begin + for (i = 'd0; i < (lines_per_pkt-2); i=i+1) begin + if (pkt.payload[i] != i) begin + err = err + ERR_BIT_PKT_DATA_MISMATCH; + num_data_errs = num_data_errs + 1; + break; + end + end + end + if (handle != 0) $fdisplay(handle, "%00d,%00d,%00d,%00d,%00d", + pkt.hdr.src_sid, pkt.hdr.dst_sid, pkt.hdr.seqnum, err, (current_time - pkt.hdr.timestamp)); + end + running = 0; + if (handle != 0) $fclose(handle); + end + end + +endmodule \ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv new file mode 100644 index 000000000..8c3d974c9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv @@ -0,0 +1,202 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_traffic_source_sim +// Description: +// A traffic generator for CHDR traffic. Simulation only. +// Supports multiple traffic pattern and injection rates. +// + +`timescale 1ns/1ps + +`include "sim_cvita_lib.svh" + +module chdr_traffic_source_sim #( + parameter WIDTH = 64, // Width of the AXI-Stream data bus + parameter MTU = 5, // log2 of the max number of lines in a packet + parameter [15:0] NODE_ID = 'd0, // Node ID for this generator + parameter [15:0] NUM_NODES = 'd16 // Total number of generators in the application +) ( + // Clocks and resets + input clk, // AXI-Stream clock + input rst, // AXI-Stream reset + // Settings + input [63:0] current_time, // The current value of the global timebase (synch to clk) + input start_stb, // A strobe that indicates the start of a generation session + input [7:0] injection_rate, // The inject rate (in percent) to simulate + input [15:0] lines_per_pkt, // Number of lines per packet to generate + input [7:0] traffic_patt, // The traffic pattern (see localparams below for values) + input [31:0] num_pkts_to_send, // Number of packets to send + // CHDR master interface + output [WIDTH-1:0] m_axis_tdata, // AXI-Stream master tdata + output m_axis_tlast, // AXI-Stream master tlast + output m_axis_tvalid, // AXI-Stream master tvalid + input m_axis_tready, // AXI-Stream master tready + // Metrics + output session_active, // Signal indicating if generation session is active + output [63:0] session_duration, // Session duration (only valid after session ends) + output [31:0] xfer_count, // Number of lines transferred (only valid after session ends) + output [31:0] pkt_count // Number of packets transferred (only valid after session ends) +); + // **** Supported Traffic Patters **** + localparam [7:0] TRAFFIC_PATT_LOOPBACK = 8'd76; //L + localparam [7:0] TRAFFIC_PATT_NEIGHBOR = 8'd78; //N + localparam [7:0] TRAFFIC_PATT_BIT_COMPLEMENT = 8'd67; //C + localparam [7:0] TRAFFIC_PATT_SEQUENTIAL = 8'd83; //S + localparam [7:0] TRAFFIC_PATT_UNIFORM = 8'd85; //U + localparam [7:0] TRAFFIC_PATT_UNIFORM_OTHERS = 8'd79; //O + localparam [7:0] TRAFFIC_PATT_RANDOM_PERM = 8'd82; //R + + cvita_master #(.DWIDTH(WIDTH)) m_chdr (.clk(clk)); + axis_t #(.DWIDTH(WIDTH)) post_fifo (.clk(clk)); + axis_t #(.DWIDTH(WIDTH)) pre_gate (.clk(clk)); + cvita_hdr_t header; + reg throttle = 1'b1; + + logic running = 0; + logic [31:0] curr_pkt_num = 'd0; + logic [31:0] num_samps_xferd = 'd0; + logic [63:0] start_time = 0; + logic [63:0] stop_time = 0; + logic [15:0] last_gen_sid = (NODE_ID - 16'd1); + + assign xfer_count = num_samps_xferd; + assign pkt_count = curr_pkt_num; + assign session_duration = (stop_time - start_time); + assign session_active = running; + + // Utility function to assign SIDs based on traffic pattern + function [15:0] gen_dst_sid; + input [7:0] traffic_patt; + input [15:0] last_sid; + + if (traffic_patt == TRAFFIC_PATT_UNIFORM) begin + gen_dst_sid = $urandom_range('d0, NUM_NODES-'d1); + end else if (traffic_patt == TRAFFIC_PATT_UNIFORM_OTHERS) begin + logic [31:0] rnum = $urandom_range('d0, NUM_NODES-'d2); + if (rnum < NODE_ID) + gen_dst_sid = rnum[15:0]; + else + gen_dst_sid = rnum[15:0] + 16'd1; + end else if (traffic_patt == TRAFFIC_PATT_SEQUENTIAL) begin + gen_dst_sid = (last_sid + 16'd1) % NUM_NODES; + end else if (traffic_patt == TRAFFIC_PATT_NEIGHBOR) begin + gen_dst_sid = (NODE_ID + 16'd1) % NUM_NODES; + end else if (traffic_patt == TRAFFIC_PATT_LOOPBACK) begin + gen_dst_sid = NODE_ID; + end else if (traffic_patt == TRAFFIC_PATT_BIT_COMPLEMENT) begin + gen_dst_sid = (NUM_NODES - NODE_ID - 1) % NUM_NODES; + end else if (traffic_patt == TRAFFIC_PATT_RANDOM_PERM) begin + //TODO: Implement me + gen_dst_sid = 0; + end else begin + gen_dst_sid = 'd0; + end + endfunction + + // Generation loop. Push to m_chdr infinitely fast + initial begin: gen_blk + // Generate infinitely + $srandom(NODE_ID + NUM_NODES); + m_chdr.reset(); + while (1) begin + // A generation session begins on the posedge of start_stb + while (~start_stb) @(posedge clk); + curr_pkt_num = 'd0; + m_chdr.reset(); + num_samps_xferd = 'd0; + start_time = current_time; + running = 1; + while (curr_pkt_num < num_pkts_to_send) begin + header = '{ + pkt_type:DATA, has_time:1, eob:0, + seqnum:curr_pkt_num[11:0], length:(lines_per_pkt*8), + src_sid:NODE_ID, dst_sid:gen_dst_sid(traffic_patt, last_gen_sid), + timestamp:0 //TS attached later + }; + last_gen_sid = header.dst_sid; + curr_pkt_num = curr_pkt_num + 'd1; + m_chdr.push_ramp_pkt(lines_per_pkt-2, 'h0, 'h1, header); + num_samps_xferd = num_samps_xferd + lines_per_pkt; + end + running = 0; + stop_time = current_time; + end + end + + // Capture packets in a really short FIFO (for backpressure) + axi_fifo #( + .WIDTH(WIDTH+1), .SIZE(MTU + 1) + ) fifo_i ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .i_tdata ({m_chdr.axis.tlast, m_chdr.axis.tdata}), + .i_tvalid (m_chdr.axis.tvalid), + .i_tready (m_chdr.axis.tready), + .o_tdata ({post_fifo.tlast, post_fifo.tdata}), + .o_tvalid (post_fifo.tvalid), + .o_tready (post_fifo.tready), + .space (), + .occupied () + ); + + // Attach timestamp after the packet leaves the FIFO after + // throttling. + + localparam [1:0] ST_HDR = 2'd0; + localparam [1:0] ST_TS = 2'd1; + localparam [1:0] ST_BODY = 2'd2; + + reg [1:0] pkt_state = ST_HDR; + always_ff @(posedge clk) begin + if (rst) begin + pkt_state <= ST_HDR; + end else if (pre_gate.tvalid & pre_gate.tready) begin + case (pkt_state) + ST_HDR: + if (~pre_gate.tlast) + pkt_state <= pre_gate.tdata[61] ? ST_TS : ST_BODY; + ST_TS: + pkt_state <= pre_gate.tlast ? ST_HDR : ST_BODY; + ST_BODY: + pkt_state <= pre_gate.tlast ? ST_HDR : ST_BODY; + default: + pkt_state <= ST_HDR; + endcase + end + end + + // Enforce injection rate by pulling from FIFO with a certain time probability + always_ff @(posedge clk) begin + throttle <= ($urandom_range(32'd99, 32'd0) > {24'h0, injection_rate}); + end + + // Insert timestamp + throttle logic + assign pre_gate.tdata = (pkt_state == ST_TS) ? current_time : post_fifo.tdata; + assign pre_gate.tlast = post_fifo.tlast; + assign pre_gate.tvalid = post_fifo.tvalid & ~throttle; + assign post_fifo.tready = pre_gate.tready & ~throttle; + + // Gate the packet to smooth out throttle-related noise. + // This also serves as a buffer for the packet in case things are backed up + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(MTU + 4), .USE_AS_BUFF(1) + ) pkt_gate_i ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .i_tdata (pre_gate.tdata), + .i_tlast (pre_gate.tlast), + .i_terror (1'b0), + .i_tvalid (pre_gate.tvalid), + .i_tready (pre_gate.tready), + .o_tdata (m_axis_tdata), + .o_tlast (m_axis_tlast), + .o_tvalid (m_axis_tvalid), + .o_tready (m_axis_tready) + ); + +endmodule \ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv new file mode 100644 index 000000000..fc9d53fe7 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv @@ -0,0 +1,428 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + + +`timescale 1ns/1ps +`define NS_PER_TICK 1 +`define NUM_TEST_CASES 7 + +`include "sim_clks_rsts.vh" +`include "sim_exec_report.vh" +`include "sim_set_rb_lib.svh" +`include "sim_axis_lib.svh" + +`define SIM_TIMEOUT_US 1000000 // Default: 1s + +module crossbar_tb #( + parameter TEST_NAME = "crossbar_tb", + // Router parameters + parameter ROUTER_IMPL = "axi_crossbar", // Router implementation + parameter ROUTER_PORTS = 10, // # Router ports + parameter ROUTER_DWIDTH = 64, // Router datapath width + parameter MTU_LOG2 = 7, // log2 of max packet size for router + parameter NUM_MASTERS = ROUTER_PORTS, // Number of data generators in test + // Test parameters + parameter TEST_MAX_PACKETS = 50, // How many packets to stream per test case? + parameter TEST_LPP = 50, // Lines per packet + parameter TEST_MIN_INJ_RATE = 60, // Minimum injection rate to test + parameter TEST_MAX_INJ_RATE = 100, // Maximum injection rate to test + parameter TEST_INJ_RATE_INCR = 10, // Injection rate increment + parameter TEST_GEN_LL_FILES = 0 // Generate files to produce load-latency graphs? + +)( + /* no IO */ +); + `TEST_BENCH_INIT(TEST_NAME,`NUM_TEST_CASES,`NS_PER_TICK) + + //---------------------------------------------------- + // General test setup + //---------------------------------------------------- + + // Clocks and reset + `DEFINE_CLK(clk, 5.000, 50) + `DEFINE_RESET(rst, 0, 10) + + // Timekeeper (cycle counter) + logic [63:0] timestamp; + initial begin : timekeeper_blk + while (rst) @(posedge clk); + timestamp = 'd0; + while (~rst) begin + @(posedge clk); + timestamp = timestamp + 'd1; + end + end + + //---------------------------------------------------- + // Instantiate traffic generators, checkers, buses + //---------------------------------------------------- + localparam FILE_PATH = {`WORKING_DIR, "/data/", ROUTER_IMPL}; + + // Data buses + axis_t #(.DWIDTH(ROUTER_DWIDTH), .NUM_STREAMS(ROUTER_PORTS)) src2rtr_axis (.clk(clk)); + axis_t #(.DWIDTH(ROUTER_DWIDTH), .NUM_STREAMS(ROUTER_PORTS)) rtr2snk_axis (.clk(clk)); + + // Control buses + settings_bus_master #(.SR_AWIDTH(16), .SR_DWIDTH(32)) rtr_sb (.clk(clk)); + wire rtr_sb_ack; + + // Test vector source and sink instantiation + logic [7:0] set_injection_rate; + logic [15:0] set_lines_per_pkt; + logic [7:0] set_traffic_patt; + logic [31:0] set_num_pkts_to_send; + logic snk_start_stb = 0; + logic src_start_stb = 0; + + wire [63:0] session_duration [0:ROUTER_PORTS-1]; + wire [ROUTER_PORTS-1:0] src_active; + wire [31:0] src_xfer_count [0:ROUTER_PORTS-1]; + wire [31:0] src_pkt_count [0:ROUTER_PORTS-1]; + wire [ROUTER_PORTS-1:0] snk_active; + wire [31:0] snk_xfer_count [0:ROUTER_PORTS-1]; + wire [31:0] snk_pkt_count [0:ROUTER_PORTS-1]; + wire [31:0] snk_data_err_count [0:ROUTER_PORTS-1]; + wire [31:0] snk_route_err_count[0:ROUTER_PORTS-1]; + + wire deadlock_detected; + reg deadlock_detected_del = 1'b0; + always @(posedge clk) deadlock_detected_del <= deadlock_detected; + wire deadlock_re = (deadlock_detected & ~deadlock_detected_del); + wire deadlock_fe = (~deadlock_detected & deadlock_detected_del); + + genvar i; + generate for (i = 0; i < ROUTER_PORTS; i=i+1) begin: src_snk_blk + chdr_traffic_source_sim #( + .WIDTH (ROUTER_DWIDTH), + .MTU (MTU_LOG2), + .NODE_ID (i), + .NUM_NODES (ROUTER_PORTS) + ) traffic_src ( + .clk (clk), + .rst (rst), + .current_time (timestamp), + .start_stb (src_start_stb & (i < NUM_MASTERS)), + .injection_rate (set_injection_rate), + .lines_per_pkt (set_lines_per_pkt), + .traffic_patt (set_traffic_patt), + .num_pkts_to_send (set_num_pkts_to_send), + .m_axis_tdata (src2rtr_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]), + .m_axis_tlast (src2rtr_axis.tlast[i]), + .m_axis_tvalid (src2rtr_axis.tvalid[i]), + .m_axis_tready (src2rtr_axis.tready[i]), + .session_active (src_active[i]), + .session_duration (session_duration[i]), + .xfer_count (src_xfer_count[i]), + .pkt_count (src_pkt_count[i]) + ); + + chdr_traffic_sink_sim #( + .WIDTH (ROUTER_DWIDTH), + .MTU (MTU_LOG2), + .NODE_ID (i), + .NUM_NODES (ROUTER_PORTS), + .FILE_PATH (TEST_GEN_LL_FILES==1 ? FILE_PATH : "") + ) traffic_sink ( + .clk (clk), + .rst (rst), + .current_time (timestamp), + .start_stb (snk_start_stb), + .injection_rate (set_injection_rate), + .lines_per_pkt (set_lines_per_pkt), + .traffic_patt (set_traffic_patt), + .s_axis_tdata (rtr2snk_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]), + .s_axis_tlast (rtr2snk_axis.tlast[i]), + .s_axis_tvalid (rtr2snk_axis.tvalid[i]), + .s_axis_tready (rtr2snk_axis.tready[i]), + .session_active (snk_active[i]), + .xfer_count (snk_xfer_count[i]), + .pkt_count (snk_pkt_count[i]), + .data_err_count (snk_data_err_count[i]), + .route_err_count (snk_route_err_count[i]) + ); + end endgenerate + + //---------------------------------------------------- + // Instantiate DUT + //---------------------------------------------------- + generate if (ROUTER_IMPL == "FIFO") begin + for (i = 0; i < ROUTER_PORTS; i=i+1) begin + axi_fifo #( + .WIDTH(ROUTER_DWIDTH+1), .SIZE(0) + ) fifo_i ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .i_tdata ({src2rtr_axis.tlast[i], src2rtr_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]}), + .i_tvalid (src2rtr_axis.tvalid[i]), + .i_tready (src2rtr_axis.tready[i]), + .o_tdata ({rtr2snk_axis.tlast[i], rtr2snk_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]}), + .o_tvalid (rtr2snk_axis.tvalid[i]), + .o_tready (rtr2snk_axis.tready[i]), + .space (), + .occupied () + ); + end + end else if (ROUTER_IMPL == "axi_crossbar") begin + axi_crossbar #( + .BASE (0), + .FIFO_WIDTH (ROUTER_DWIDTH), + .DST_WIDTH (16), + .NUM_INPUTS (ROUTER_PORTS), + .NUM_OUTPUTS (ROUTER_PORTS) + ) router_dut_i ( + // General + .clk (clk), + .reset (rst), + .clear (1'b0), + .local_addr (8'd0), + // Inputs + .i_tdata (src2rtr_axis.tdata), + .i_tlast (src2rtr_axis.tlast), + .i_tvalid (src2rtr_axis.tvalid), + .i_tready (src2rtr_axis.tready), + .pkt_present (src2rtr_axis.tvalid), + // Output + .o_tdata (rtr2snk_axis.tdata), + .o_tlast (rtr2snk_axis.tlast), + .o_tvalid (rtr2snk_axis.tvalid), + .o_tready (rtr2snk_axis.tready), + // Setting Bus + .set_stb (rtr_sb.settings_bus.set_stb), + .set_addr (rtr_sb.settings_bus.set_addr), + .set_data (rtr_sb.settings_bus.set_data), + // Readback bus + .rb_rd_stb (1'b0), + .rb_addr ({(2*$clog2(ROUTER_PORTS)){1'b0}}), + .rb_data () + ); + end else if (ROUTER_IMPL == "chdr_crossbar_nxn") begin + chdr_crossbar_nxn #( + .CHDR_W (ROUTER_DWIDTH), + .NPORTS (ROUTER_PORTS), + .DEFAULT_PORT (0), + .MTU (MTU_LOG2), + .ROUTE_TBL_SIZE (6), + .MUX_ALLOC ("ROUND-ROBIN"), + .OPTIMIZE ("AREA"), + .NPORTS_MGMT (0), + .EXT_RTCFG_PORT (1) + ) router_dut_i ( + // General + .clk (clk), + .reset (rst), + // Inputs + .s_axis_tdata (src2rtr_axis.tdata), + .s_axis_tlast (src2rtr_axis.tlast), + .s_axis_tvalid (src2rtr_axis.tvalid), + .s_axis_tready (src2rtr_axis.tready), + // Output + .m_axis_tdata (rtr2snk_axis.tdata), + .m_axis_tlast (rtr2snk_axis.tlast), + .m_axis_tvalid (rtr2snk_axis.tvalid), + .m_axis_tready (rtr2snk_axis.tready), + // External router config + .ext_rtcfg_stb (rtr_sb.settings_bus.set_stb), + .ext_rtcfg_addr (rtr_sb.settings_bus.set_addr), + .ext_rtcfg_data (rtr_sb.settings_bus.set_data), + .ext_rtcfg_ack (rtr_sb_ack) + ); + end else begin + axis_ctrl_crossbar_nxn #( + .WIDTH (ROUTER_DWIDTH), + .NPORTS (ROUTER_PORTS), + .TOPOLOGY (ROUTER_IMPL == "axis_ctrl_2d_torus" ? "TORUS" : "MESH"), + .INGRESS_BUFF_SIZE(MTU_LOG2), + .ROUTER_BUFF_SIZE (MTU_LOG2), + .ROUTING_ALLOC ("WORMHOLE"), + .SWITCH_ALLOC ("PRIO") + ) router_dut_i ( + // General + .clk (clk), + .reset (rst), + // Inputs + .s_axis_tdata (src2rtr_axis.tdata), + .s_axis_tlast (src2rtr_axis.tlast), + .s_axis_tvalid (src2rtr_axis.tvalid), + .s_axis_tready (src2rtr_axis.tready), + // Output + .m_axis_tdata (rtr2snk_axis.tdata), + .m_axis_tlast (rtr2snk_axis.tlast), + .m_axis_tvalid (rtr2snk_axis.tvalid), + .m_axis_tready (rtr2snk_axis.tready), + // Deadlock detection + .deadlock_detected(deadlock_detected) + ); + end endgenerate + + //---------------------------------------------------- + // Test routine. Runs tests and writes metrics to file + //---------------------------------------------------- + + // Constants + localparam [7:0] TRAFFIC_PATT_LOOPBACK = 8'd76; //L + localparam [7:0] TRAFFIC_PATT_NEIGHBOR = 8'd78; //N + localparam [7:0] TRAFFIC_PATT_BIT_COMPLEMENT = 8'd67; //C + localparam [7:0] TRAFFIC_PATT_SEQUENTIAL = 8'd83; //S + localparam [7:0] TRAFFIC_PATT_UNIFORM = 8'd85; //U + localparam [7:0] TRAFFIC_PATT_UNIFORM_OTHERS = 8'd79; //O + localparam [7:0] TRAFFIC_PATT_RANDOM_PERM = 8'd82; //R + + string filename; + integer node; + integer session = 0; + integer handle = 0; + logic [63:0] start_time; + integer total_pkts_recvd = 0, total_pkts_sent = 0; + + task sim_dataflow; + input [7:0] injection_rate; + input [7:0] traffic_patt; + input [15:0] lines_per_pkt; + input [31:0] num_pkts_to_send; + begin + session = session + 1; + $display("--------------- New Simulation ---------------"); + $display("- Module = %s", ROUTER_IMPL); + $display("- Nodes = %00d", ROUTER_PORTS); + $display("- Injection Rate = %00d%%", injection_rate); + $display("- Traffic Pattern = %c", traffic_patt); + $display("- Packet Size = %00d words (%00d bits)", lines_per_pkt, ROUTER_DWIDTH); + $display("- Max Packets = %00d", num_pkts_to_send); + // Configure settings + @(posedge clk); + set_injection_rate = injection_rate; + set_lines_per_pkt = lines_per_pkt; + set_traffic_patt = traffic_patt; + set_num_pkts_to_send = num_pkts_to_send; + @(posedge clk); + // Start the sink then the source + $display("Data flow starting..."); + snk_start_stb = 1; + src_start_stb = 1; + @(posedge clk); + src_start_stb = 0; + snk_start_stb = 0; + @(posedge clk); + start_time = timestamp; + // Wait for source blocks to finish generating + $display("Waiting for packets to transmit... (may take a while)"); + while (|src_active) begin + @(posedge clk); + if (deadlock_re) $display("WARNING: Deadlock detected"); + if (deadlock_fe) $display("Recovered from deadlock"); + end + // Wait for sink blocks to finish consuming + $display("All packets transmitted. Waiting to flush..."); + while (|snk_active) @(posedge clk); + // If router deadlocks then wait for it to recover + if (deadlock_detected) begin + $display("Waiting for deadlock recovery to finish..."); + while (deadlock_detected) @(posedge clk); + end + repeat(set_lines_per_pkt) @(posedge clk); + // Record summary to file and print to console + $sformat(filename, "%s/info_inj%03d_lpp%05d_traffic%c_sess%04d.csv", + FILE_PATH, injection_rate, lines_per_pkt, traffic_patt, session); + if (TEST_GEN_LL_FILES == 1) begin + handle = $fopen(filename, "w"); + if (handle == 0) begin + $error("Could not open file: %s", filename); + $finish(); + end + end + if (handle != 0) $fdisplay(handle, "Impl,Node,TxPkts,RxPkts,Duration,ErrRoute,ErrData"); + total_pkts_sent = 0; + total_pkts_recvd = 0; + for (node = 0; node < ROUTER_PORTS; node=node+1) begin + $display("- Node #%03d: TX = %5d pkts, RX = %5d pkts, Inj Rate = %3d%%. Errs = %5d route, %5d data", + node,src_pkt_count[node], snk_pkt_count[node], ((src_xfer_count[node]*100)/session_duration[node]), + snk_route_err_count[node], snk_data_err_count[node]); + if (handle != 0) $fdisplay(handle, "%s,%00d,%00d,%00d,%00d,%00d,%00d", ROUTER_IMPL, + node,src_pkt_count[node], snk_pkt_count[node], session_duration[node], + snk_route_err_count[node], snk_data_err_count[node]); + total_pkts_sent = total_pkts_sent + src_pkt_count[node]; + total_pkts_recvd = total_pkts_recvd + snk_pkt_count[node]; + `ASSERT_ERROR(snk_route_err_count[node] == 0, "Routing errors. Received packets destined to other nodes"); + `ASSERT_ERROR(snk_data_err_count[node] == 0, "Integrity errors. Received corrupted packets"); + end + $display("Finished. Elapsed = %00d cycles, TX = %00d pkts, RX = %00d pkts", + (timestamp - start_time), total_pkts_sent, total_pkts_recvd); + `ASSERT_ERROR(total_pkts_recvd == total_pkts_sent, "Total # TX packets did not match the total # RX packets"); + if (handle != 0) $fclose(handle); + $display("----------------------------------------------"); + end + endtask + + //---------------------------------------------------- + // Main test loop + //---------------------------------------------------- + + logic [31:0] MAX_PACKETS = TEST_MAX_PACKETS; + logic [15:0] LPP = TEST_LPP; + integer MIN_INJ_RATE = TEST_MIN_INJ_RATE; + integer MAX_INJ_RATE = TEST_MAX_INJ_RATE; + integer INJ_RATE_INCR = TEST_INJ_RATE_INCR; + + integer inj_rate = 0; + initial begin : tb_main + src_start_stb = 0; + snk_start_stb = 0; + rtr_sb.reset(); + while (rst) @(posedge clk); + + repeat (10) @(posedge clk); + + `TEST_CASE_START("Set up crossbar"); + for (node = 0; node < ROUTER_PORTS; node=node+1) begin + if (ROUTER_IMPL == "axi_crossbar") begin + rtr_sb.write(16'd256 + node[15:0], {16'h0, node[15:0]}); + end else if (ROUTER_IMPL == "chdr_crossbar_nxn") begin + rtr_sb.write(node[15:0], {16'h0, node[15:0]}); + while (~rtr_sb_ack) @(posedge clk); + end + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate LOOPBACK Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_LOOPBACK, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate SEQUENTIAL Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_SEQUENTIAL, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate UNIFORM Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_UNIFORM, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate UNIFORM_OTHERS Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_UNIFORM_OTHERS, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate BIT_COMPLEMENT Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_BIT_COMPLEMENT, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate NEIGHBOR Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_NEIGHBOR, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_BENCH_DONE + end // initial begin + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py new file mode 100755 index 000000000..35821c2c4 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +# +# Copyright 2018 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# +# Description +# Parses the output files generated by crossbar_tb and outputs +# a load-latency graph and a expected-actual throughput graph + +import os, sys +import argparse +import time +import glob +import csv +import re +import numpy as np + +import matplotlib +#matplotlib.use('Agg') +import matplotlib.pyplot as plt + +def get_options(): + parser = argparse.ArgumentParser(description='Generate Load Latency Graphs') + parser.add_argument('datadir', type=str, default='.', help='Location of packet capture files generated by testbench') + return parser.parse_args() + +TRAFFIC_PATTERNS = {'U':'UNIFORM', 'O':'UNIFORM_OTHERS', 'N':'NEIGHBOR', 'L':'LOOPBACK', 'S':'SEQUENTIAL', 'C':'BIT_COMPLEMENT', 'R':'RANDOM_PERM'} + +class InfoFile(): + def __init__(self, filename): + # Extract test info from filename + m = re.search(r".*/info_inj([0-9]+)_lpp([0-9]+)_traffic(.)_sess([0-9]+)\.csv", filename) + if m is None: + raise ValueError('Incorrect filename format: %s'%(filename)) + self.inj_rate = int(m.group(1)) + self.lpp = int(m.group(2)) + self.traffic_patt = TRAFFIC_PATTERNS[m.group(3)] + self.session = int(m.group(4)) + + self.tx_pkts = 0 + self.rx_pkts = 0 + self.duration = 0 + self.errs = 0 + self.nodes = 0 + with open(filename, 'r') as csvfile: + reader = csv.reader(csvfile, delimiter=',') + isheader = True + for row in reader: + if isheader: + isheader = False + if row != ['Impl', 'Node', 'TxPkts', 'RxPkts', 'Duration', 'ErrRoute', 'ErrData']: + raise ValueError('Incorrect header: %s'%(filename)) + else: + self.impl = row[0] + self.tx_pkts = self.tx_pkts + int(row[2]) + self.rx_pkts = self.tx_pkts + int(row[3]) + self.duration = self.duration + int(row[4]) + self.errs = self.errs + int(row[5]) + int(row[6]) + self.nodes = self.nodes + 1 + self.real_inj_rate = (100.0 * self.tx_pkts * self.lpp) / self.duration + +class PktFile(): + def __init__(self, filename): + # Extract test info from filename + m = re.search(r".*/pkts_node([0-9]+)_inj([0-9]+)_lpp([0-9]+)_traffic(.)_sess([0-9]+)\.csv", filename) + if m is None: + raise ValueError('Incorrect filename format: %s'%(filename)) + self.node = int(m.group(1)) + self.inj_rate = int(m.group(2)) + self.lpp = int(m.group(3)) + self.traffic_patt = TRAFFIC_PATTERNS[m.group(4)] + self.session = int(m.group(5)) + + self.latencies = [] + with open(filename, 'r') as csvfile: + reader = csv.reader(csvfile, delimiter=',') + isheader = True + for row in reader: + if isheader: + isheader = False + if row != ['Src', 'Dst', 'Seqno', 'Error', 'Latency']: + raise ValueError('Incorrect header: %s'%(filename)) + else: + self.latencies.append(int(row[4])) + + +######################################################################## +# main +######################################################################## +if __name__=='__main__': + options = get_options() + + if (not os.path.isdir(options.datadir)): + print('ERROR: Data director %s does not exist'%(options.datadir)) + sys.exit(1) + + info_db = dict() + info_files = glob.glob(os.path.join(options.datadir, 'info*.csv')) + router_impl = '' + lines_per_pkt = 0 + for ifile in info_files: + print('INFO: Reading %s...'%(ifile)) + tmp = InfoFile(ifile) + router_impl = tmp.impl # Assume that all files have the same impl + lines_per_pkt = tmp.lpp # Assume that all files have the same LPP + info_db[(tmp.lpp, tmp.traffic_patt, tmp.inj_rate)] = tmp + + pkt_db = dict() + pkts_files = glob.glob(os.path.join(options.datadir, 'pkts*.csv')) + for pfile in pkts_files: + print('INFO: Reading %s...'%(pfile)) + tmp = PktFile(pfile) + config_key = (tmp.lpp, tmp.traffic_patt) + if config_key not in pkt_db: + pkt_db[config_key] = dict() + if tmp.inj_rate not in pkt_db[config_key]: + pkt_db[config_key][tmp.inj_rate] = [] + + + pkt_db[config_key][tmp.inj_rate].extend(tmp.latencies) + + # Write load-latency plots to file + actual_inj_rate_db = dict() + for config in sorted(pkt_db): + (lpp, traffic_patt) = config + ll_file = 'load-latency_%s_traffic-%s_lpp-%d.png'%(router_impl, traffic_patt, lpp) + print('INFO: Writing file ' + ll_file + '...') + percentile = [0, 25, 50, 75, 90, 95, 99, 99.9, 100] + plt.figure() + plt.title('Load Latency Graph for %s\n(Traffic: %s, LPP: %d)'%(router_impl, traffic_patt, lpp)) + for p in percentile: + plot_data = dict() + for inj_rate in pkt_db[config]: + real_inj_rate = info_db[(lpp, traffic_patt, inj_rate)].real_inj_rate + plot_data[real_inj_rate] = np.percentile(pkt_db[config][inj_rate], p) + latencies = [] + rates = [] + for inj_rate in sorted(plot_data): + rates.append(inj_rate) + latencies.append(plot_data[inj_rate]) + plt.plot(rates, latencies, label='$P_{%.1f}$'%(p)) + plt.xlabel('Load (%)') + plt.xticks(range(0, 110, 10)) + plt.ylabel('Latency (cycles)') + plt.grid(True) + plt.legend() + plt.savefig(os.path.join(options.datadir, ll_file), dpi=120) + # Generate actual inj_rate graph + real_inj_rates = [] + for inj_rate in sorted(pkt_db[config]): + real_inj_rates.append(info_db[(lpp, traffic_patt, inj_rate)].real_inj_rate) + actual_inj_rate_db[config] = (sorted(pkt_db[config]), real_inj_rates) + + # Write offered vs actual injection rate plots to file + injrate_file = 'injection-rate_%s_lpp-%d.png'%(router_impl, lines_per_pkt) + print('INFO: Writing file ' + injrate_file + '...') + plt.figure() + plt.title('Max Injection Rate Graph for %s'%(router_impl)) + for config in actual_inj_rate_db: + (x, y) = actual_inj_rate_db[config] + plt.plot(x, y, label=str(config)) + plt.xlabel('Offered Injection Rate (%)') + plt.xticks(range(0, 110, 10)) + plt.ylabel('Accepted Injection Rate (%)') + plt.yticks(range(0, 110, 10)) + plt.grid(True) + plt.legend() + plt.savefig(os.path.join(options.datadir, injrate_file), dpi=120) \ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py new file mode 100755 index 000000000..8e546fef9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py @@ -0,0 +1,106 @@ +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# +# Description +# Run the crossbar testbench (crossbar_tb) for varios parameter +# configurations and generates load-latency graphs for each run. + +import argparse +import math +import os, sys +import shutil +import glob +import subprocess + +g_tb_top_template = """ +`timescale 1ns/1ps +module crossbar_tb_auto(); + crossbar_tb #( + .TEST_NAME ("crossbar_tb_auto"), + .ROUTER_IMPL ("{rtr_impl}"), + .ROUTER_PORTS ({rtr_ports}), + .ROUTER_DWIDTH ({rtr_width}), + .MTU_LOG2 ({rtr_mtu}), + .NUM_MASTERS ({rtr_sources}), + .TEST_MAX_PACKETS ({tst_maxpkts}), + .TEST_LPP ({tst_lpp}), + .TEST_MIN_INJ_RATE ({tst_injrate_min}), + .TEST_MAX_INJ_RATE ({tst_injrate_max}), + .TEST_INJ_RATE_INCR (10), + .TEST_GEN_LL_FILES (1) + ) impl ( + /* no IO */ + ); +endmodule +""" + +g_test_params = { + 'data': {'rtr_width':64, 'rtr_mtu':7, 'tst_maxpkts':100, 'tst_lpp':100, 'tst_injrate_min':30, 'tst_injrate_max':100}, + 'ctrl': {'rtr_width':64, 'rtr_mtu':5, 'tst_maxpkts':100, 'tst_lpp':20, 'tst_injrate_min':10, 'tst_injrate_max':50}, +} + +g_xb_types = { + 'chdr_crossbar_nxn':'data', 'axi_crossbar':'data', + 'axis_ctrl_2d_torus':'ctrl', 'axis_ctrl_2d_mesh':'ctrl' +} + +def get_options(): + parser = argparse.ArgumentParser(description='Run correctness sim and generate load-latency plots') + parser.add_argument('--impl', type=str, default='chdr_crossbar_nxn', help='Implementation (CSV) [%s]'%(','.join(g_xb_types.keys()))) + parser.add_argument('--ports', type=str, default='16', help='Number of ports (CSV)') + parser.add_argument('--sources', type=str, default='16', help='Number of active data sources (masters)') + return parser.parse_args() + +def launch_run(impl, ports, sources): + run_name = '%s_ports%d_srcs%d'%(impl, ports, sources) + # Prepare a transform map to autogenerate a TB file + transform = {'rtr_impl':impl, 'rtr_ports':ports, 'rtr_sources':sources} + for k,v in g_test_params[g_xb_types[impl]].items(): + transform[k] = v + # Create crossbar_tb_auto.sv with specified parameters + with open('crossbar_tb_auto.sv', 'w') as out_file: + out_file.write(g_tb_top_template.format(**transform)) + # Create data directory for the simulation + data_dir = os.path.join('data', impl) + export_dir = os.path.join('data', run_name) + try: + os.makedirs('data') + except FileExistsError: + pass + os.makedirs(data_dir) + os.makedirs(export_dir) + # Run "make xsim" + exitcode = subprocess.Popen('make xsim TB_TOP_MODULE=crossbar_tb_auto', shell=True).wait() + if exitcode != 0: + raise RuntimeError('Error running "make xsim". Was setupenv.sh run?') + # Generate load-latency graphs + exitcode = subprocess.Popen('gen_load_latency_graph.py ' + data_dir, shell=True).wait() + if exitcode != 0: + raise RuntimeError('Error running "gen_load_latency_graph.py"') + # Copy files + os.rename('xsim.log', os.path.join(export_dir, 'xsim.log')) + for file in glob.glob(os.path.join(data_dir, '*.png')): + shutil.copy(file, export_dir) + # Cleanup outputs + subprocess.Popen('make cleanall', shell=True).wait() + try: + os.remove('crossbar_tb_auto.sv') + except FileNotFoundError: + pass + try: + shutil.rmtree(data_dir) + except OSError: + print('WARNING: Could not delete ' + data_dir) + +def main(): + args = get_options(); + for impl in args.impl.strip().split(','): + for ports in args.ports.strip().split(','): + for sources in args.sources.strip().split(','): + launch_run(impl, int(ports), min(int(ports), int(sources))) + +if __name__ == '__main__': + main() diff --git a/fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py b/fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py new file mode 100755 index 000000000..a2eaf71fb --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py @@ -0,0 +1,125 @@ +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +import argparse +import math +import sys +import datetime +import random + +# Parse command line options +# ------------------------------------------------ +def get_options(): + parser = argparse.ArgumentParser(description='Generate a node to coordinate mapping file.') + parser.add_argument('--pattern', type=str, default='xy', choices=['xy', 'yx', 'spiral', 'random'], help='Node distribution pattern') + parser.add_argument('--dimsize', type=int, default=4, help='Maximum dimension size') + parser.add_argument('--seed', type=int, default=None, help='Seed for random permutation generator') + return parser.parse_args() + +# Pattern Generators +# ------------------------------------------------ +def gen_xy(N): + nodes = dict() + for y in range(N): + for x in range(N): + nodes[(y*N + x)] = (x, y) + return nodes + +def gen_yx(N): + nodes = dict() + for y in range(N): + for x in range(N): + nodes[(x*N + y)] = (x, y) + return nodes + +def gen_spiral(N): + nodes = dict() + x = y = 0 + dx = 0 + dy = -1 + for i in range(N**2): + if (-N/2 < x <= N/2) and (-N/2 < y <= N/2): + nodes[i] = (x + int(math.ceil(N/2)) - 1, y + int(math.ceil(N/2)) - 1) + if x == y or (x < 0 and x == -y) or (x > 0 and x == 1-y): + dx, dy = -dy, dx + x, y = x+dx, y+dy + return nodes + +def gen_random(N): + nodes = dict() + rnodes = random.sample(range(N*N), N*N) + for y in range(N): + for x in range(N): + nodes[rnodes[x*N + y]] = (x, y) + return nodes + +# Source Generators +# ------------------------------------------------ +def layout_nodes(nodes): + N = int(math.sqrt(len(nodes))) + #inv_nodes = {v: k for k, v in nodes.iteritems()} + coords = {v: k for k, v in nodes.items()} + lines = [] + for y in range(N): + line = '' + for x in range(N): + line += '%5d'%(coords[(x,y)]) + lines.append(line) + return lines + +def gen_vparams(nodes, N, pattern): + src_lines = [ '\n// DIM_SIZE = %d, PATTERN = %s'%(N,pattern.upper()), '//------------------------------------' ] + for l in layout_nodes(nodes): + src_lines.append('// ' + l) + bitw = math.ceil(math.log2(N)) + xvals = ','.join(['%d\'d%d'%(bitw,v[0]) for k, v in sorted(nodes.items(), reverse=True)]) + yvals = ','.join(['%d\'d%d'%(bitw,v[1]) for k, v in sorted(nodes.items(), reverse=True)]) + xpar = 'localparam [%d:0] XCOORD_DIM_%03d = {%s};'%(bitw*N*N-1, N, xvals) + ypar = 'localparam [%d:0] YCOORD_DIM_%03d = {%s};'%(bitw*N*N-1, N, yvals) + src_lines.append(xpar) + src_lines.append(ypar) + src_lines.append('') + return src_lines + +def gen_lookup_func(dim, N): + src_lines = [ 'function [CLOG2_DIM_SIZE-1:0] node_to_%sdst;'%(dim), ' input [WIDTH-1:0] header;', 'begin'] + dim_sizes = range(2, N+1) + for i in dim_sizes: + node_bitw = math.ceil(math.log2(i*i)) + dim_bitw = math.ceil(math.log2(i)) + prefix = ' ' if (i == dim_sizes[0]) else ' else ' + src_lines.append(prefix + 'if (DIM_SIZE == %d)'%(i)) + src_lines.append(' node_to_%sdst = %sCOORD_DIM_%03d[%d*header[%d:0] +: %d];'%(dim,dim.upper(),i,dim_bitw,node_bitw-1,dim_bitw)) + src_lines.append(' else') + src_lines.append(' node_to_%sdst = {CLOG2_DIM_SIZE{1\'d0}};'%(dim)) + src_lines.append('end endfunction\n\n') + return src_lines + +def gen_vheader(dimsize, mapgen, pattern, filename): + with open(filename, 'w') as vhfile: + vhfile.write('// Copyright %s Ettus Research, A National Instruments Company\n'%(datetime.datetime.now().year)) + vhfile.write('// SPDX-License-Identifier: LGPL-3.0-or-later\n') + vhfile.write('//\n') + vhfile.write('// Autogenerated file. Do not modify.\n') + vhfile.write('// $ %s\n'%(' '.join(sys.argv[:]))) + vhfile.write('\nparameter CLOG2_DIM_SIZE = $clog2(DIM_SIZE); //Vivado workaround\n\n') + for i in range(2, dimsize+1): + nodes = mapgen(i) + N = math.sqrt(len(nodes)) + vhfile.write('\n'.join(gen_vparams(nodes, N, pattern))) + vhfile.write('\n\n') + vhfile.write('\n'.join(gen_lookup_func('x', dimsize))) + vhfile.write('\n'.join(gen_lookup_func('y', dimsize))) + +def main(): + args = get_options(); + random.seed(args.seed) + generators = {'xy': gen_xy, 'yx': gen_yx, 'spiral':gen_spiral, 'random': gen_random} + gen_vheader(args.dimsize, generators[args.pattern], args.pattern, 'mesh_node_mapping.vh') + +if __name__ == '__main__': + main() diff --git a/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v new file mode 100644 index 000000000..e0338347b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v @@ -0,0 +1,481 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: mesh_2d_dor_router_multi_sw +// Description: +// Alternate implementation for mesh_2d_dor_router_single_sw with +// multiple switches for independent paths between inputs and outputs +// **NOTE**: This module has not been validated + +module mesh_2d_dor_router_multi_sw #( + parameter WIDTH = 64, + parameter DIM_SIZE = 4, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0, + parameter TERM_BUFF_SIZE = 5, + parameter XB_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", // Routing (switching) method {WORMHOLE, CUT-THROUGH} + parameter SWITCH_ALLOC = "PRIO" // Switch allocation algorithm {ROUND-ROBIN, PRIO} +) ( + // Clocks and resets + input wire clk, + input wire reset, + + // Terminal connections + input wire [WIDTH-1:0] s_axis_ter_tdata, + input wire s_axis_ter_tlast, + input wire s_axis_ter_tvalid, + output wire s_axis_ter_tready, + output wire [WIDTH-1:0] m_axis_ter_tdata, + output wire m_axis_ter_tlast, + output wire m_axis_ter_tvalid, + input wire m_axis_ter_tready, + + // West inter-router connections + input wire [WIDTH-1:0] s_axis_wst_tdata, + input wire [0:0] s_axis_wst_tdest, + input wire s_axis_wst_tlast, + input wire s_axis_wst_tvalid, + output wire s_axis_wst_tready, + output wire [WIDTH-1:0] m_axis_wst_tdata, + output wire [0:0] m_axis_wst_tdest, + output wire m_axis_wst_tlast, + output wire m_axis_wst_tvalid, + input wire m_axis_wst_tready, + + // East inter-router connections + input wire [WIDTH-1:0] s_axis_est_tdata, + input wire [0:0] s_axis_est_tdest, + input wire s_axis_est_tlast, + input wire s_axis_est_tvalid, + output wire s_axis_est_tready, + output wire [WIDTH-1:0] m_axis_est_tdata, + output wire [0:0] m_axis_est_tdest, + output wire m_axis_est_tlast, + output wire m_axis_est_tvalid, + input wire m_axis_est_tready, + + // North inter-router connections + input wire [WIDTH-1:0] s_axis_nor_tdata, + input wire [0:0] s_axis_nor_tdest, + input wire s_axis_nor_tlast, + input wire s_axis_nor_tvalid, + output wire s_axis_nor_tready, + output wire [WIDTH-1:0] m_axis_nor_tdata, + output wire [0:0] m_axis_nor_tdest, + output wire m_axis_nor_tlast, + output wire m_axis_nor_tvalid, + input wire m_axis_nor_tready, + + // South inter-router connections + input wire [WIDTH-1:0] s_axis_sou_tdata, + input wire [0:0] s_axis_sou_tdest, + input wire s_axis_sou_tlast, + input wire s_axis_sou_tvalid, + output wire s_axis_sou_tready, + output wire [WIDTH-1:0] m_axis_sou_tdata, + output wire [0:0] m_axis_sou_tdest, + output wire m_axis_sou_tlast, + output wire m_axis_sou_tvalid, + input wire m_axis_sou_tready +); + // ------------------------------------------------- + // Routing functions + // ------------------------------------------------- + `include "mesh_node_mapping.vh" + + function [2:0] term_route; + input [WIDTH-1:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + term_route = 3'd0; //TER + end else if (xdst == XB_ADDR_X) begin + if (ydiff < 0) + term_route = 3'd3; //NOR + else + term_route = 3'd4; //SOU + end else begin + if (xdiff < 0) + term_route = 3'd1; //WST + else + term_route = 3'd2; //EST + end + end + endfunction + + function [1:0] xdim_route; + input [WIDTH-1:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + xdim_route = 2'd0; //TER + end else if (xdst == XB_ADDR_X) begin + if (ydiff < 0) + xdim_route = 2'd2; //NOR + else + xdim_route = 2'd3; //SOU + end else begin + xdim_route = 2'd1; //Forward + end + end + endfunction + + function [0:0] ydim_route; + input [WIDTH-1:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + ydim_route = 1'd0; //TER + end else if (xdst == XB_ADDR_X) begin + ydim_route = 1'd1; //Forward + end + end + endfunction + + + // ------------------------------------------------- + // Input buffers + // ------------------------------------------------- + wire [WIDTH-1:0] ter_i_tdata; + wire ter_i_tlast; + wire ter_i_tvalid; + wire ter_i_tready; + + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE) + ) term_in_pkt_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_ter_tdata), + .i_tlast (s_axis_ter_tlast), + .i_tvalid (s_axis_ter_tvalid), + .i_tready (s_axis_ter_tready), + .i_terror (1'b0), + .o_tdata (ter_i_tdata), + .o_tlast (ter_i_tlast), + .o_tvalid (ter_i_tvalid), + .o_tready (ter_i_tready) + ); + + wire [WIDTH-1:0] wst_i_tdata, est_i_tdata, nor_i_tdata, sou_i_tdata; + wire wst_i_tlast, est_i_tlast, nor_i_tlast, sou_i_tlast; + wire wst_i_tvalid, est_i_tvalid, nor_i_tvalid, sou_i_tvalid; + wire wst_i_tready, est_i_tready, nor_i_tready, sou_i_tready; + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) wst_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_wst_tdata), + .s_axis_tdest (s_axis_wst_tdest), + .s_axis_tlast (s_axis_wst_tlast), + .s_axis_tvalid (s_axis_wst_tvalid), + .s_axis_tready (s_axis_wst_tready), + .m_axis_tdata (wst_i_tdata), + .m_axis_tlast (wst_i_tlast), + .m_axis_tvalid (wst_i_tvalid), + .m_axis_tready (wst_i_tready) + ); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) est_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_est_tdata), + .s_axis_tdest (s_axis_est_tdest), + .s_axis_tlast (s_axis_est_tlast), + .s_axis_tvalid (s_axis_est_tvalid), + .s_axis_tready (s_axis_est_tready), + .m_axis_tdata (est_i_tdata), + .m_axis_tlast (est_i_tlast), + .m_axis_tvalid (est_i_tvalid), + .m_axis_tready (est_i_tready) + ); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) nor_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_nor_tdata), + .s_axis_tdest (s_axis_nor_tdest), + .s_axis_tlast (s_axis_nor_tlast), + .s_axis_tvalid (s_axis_nor_tvalid), + .s_axis_tready (s_axis_nor_tready), + .m_axis_tdata (nor_i_tdata), + .m_axis_tlast (nor_i_tlast), + .m_axis_tvalid (nor_i_tvalid), + .m_axis_tready (nor_i_tready) + ); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) sou_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_sou_tdata), + .s_axis_tdest (s_axis_sou_tdest), + .s_axis_tlast (s_axis_sou_tlast), + .s_axis_tvalid (s_axis_sou_tvalid), + .s_axis_tready (s_axis_sou_tready), + .m_axis_tdata (sou_i_tdata), + .m_axis_tlast (sou_i_tlast), + .m_axis_tvalid (sou_i_tvalid), + .m_axis_tready (sou_i_tready) + ); + + // ------------------------------------------------- + // Input demuxes + // ------------------------------------------------- + + wire [WIDTH-1:0] t2t_tdata, t2w_tdata, t2e_tdata, t2n_tdata, t2s_tdata; + wire t2t_tlast, t2w_tlast, t2e_tlast, t2n_tlast, t2s_tlast; + wire t2t_tvalid, t2w_tvalid, t2e_tvalid, t2n_tvalid, t2s_tvalid; + wire t2t_tready, t2w_tready, t2e_tready, t2n_tready, t2s_tready; + + wire [WIDTH-1:0] w2t_tdata, w2e_tdata, w2n_tdata, w2s_tdata; + wire w2t_tlast, w2e_tlast, w2n_tlast, w2s_tlast; + wire w2t_tvalid, w2e_tvalid, w2n_tvalid, w2s_tvalid; + wire w2t_tready, w2e_tready, w2n_tready, w2s_tready; + + wire [WIDTH-1:0] e2t_tdata, e2w_tdata, e2n_tdata, e2s_tdata; + wire e2t_tlast, e2w_tlast, e2n_tlast, e2s_tlast; + wire e2t_tvalid, e2w_tvalid, e2n_tvalid, e2s_tvalid; + wire e2t_tready, e2w_tready, e2n_tready, e2s_tready; + + wire [WIDTH-1:0] n2t_tdata, n2s_tdata; + wire n2t_tlast, n2s_tlast; + wire n2t_tvalid, n2s_tvalid; + wire n2t_tready, n2s_tready; + + wire [WIDTH-1:0] s2t_tdata, s2n_tdata; + wire s2t_tlast, s2n_tlast; + wire s2t_tvalid, s2n_tvalid; + wire s2t_tready, s2n_tready; + + wire [WIDTH-1:0] ter_i_hdr, wst_i_hdr, est_i_hdr, nor_i_hdr, sou_i_hdr; + + axi_demux #( + .WIDTH(WIDTH), .SIZE(5), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) ter_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (ter_i_hdr), + .dest (term_route(ter_i_hdr)), + .i_tdata (ter_i_tdata), + .i_tlast (ter_i_tlast), + .i_tvalid (ter_i_tvalid), + .i_tready (ter_i_tready), + .o_tdata ({t2s_tdata, t2n_tdata, t2e_tdata, t2w_tdata, t2t_tdata}), + .o_tlast ({t2s_tlast, t2n_tlast, t2e_tlast, t2w_tlast, t2t_tlast}), + .o_tvalid ({t2s_tvalid, t2n_tvalid, t2e_tvalid, t2w_tvalid, t2t_tvalid}), + .o_tready ({t2s_tready, t2n_tready, t2e_tready, t2w_tready, t2t_tready}) + ); + + axi_demux #( + .WIDTH(WIDTH), .SIZE(4), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) wst_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (wst_i_hdr), + .dest (xdim_route(wst_i_hdr)), + .i_tdata (wst_i_tdata), + .i_tlast (wst_i_tlast), + .i_tvalid (wst_i_tvalid), + .i_tready (wst_i_tready), + .o_tdata ({w2s_tdata, w2n_tdata, w2e_tdata, w2t_tdata}), + .o_tlast ({w2s_tlast, w2n_tlast, w2e_tlast, w2t_tlast}), + .o_tvalid ({w2s_tvalid, w2n_tvalid, w2e_tvalid, w2t_tvalid}), + .o_tready ({w2s_tready, w2n_tready, w2e_tready, w2t_tready}) + ); + + axi_demux #( + .WIDTH(WIDTH), .SIZE(4), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) est_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (est_i_hdr), + .dest (xdim_route(est_i_hdr)), + .i_tdata (est_i_tdata), + .i_tlast (est_i_tlast), + .i_tvalid (est_i_tvalid), + .i_tready (est_i_tready), + .o_tdata ({e2s_tdata, e2n_tdata, e2w_tdata, e2t_tdata}), + .o_tlast ({e2s_tlast, e2n_tlast, e2w_tlast, e2t_tlast}), + .o_tvalid ({e2s_tvalid, e2n_tvalid, e2w_tvalid, e2t_tvalid}), + .o_tready ({e2s_tready, e2n_tready, e2w_tready, e2t_tready}) + ); + + axi_demux #( + .WIDTH(WIDTH), .SIZE(2), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) nor_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (nor_i_hdr), + .dest (ydim_route(nor_i_hdr)), + .i_tdata (nor_i_tdata), + .i_tlast (nor_i_tlast), + .i_tvalid (nor_i_tvalid), + .i_tready (nor_i_tready), + .o_tdata ({n2t_tdata, n2s_tdata}), + .o_tlast ({n2t_tlast, n2s_tlast}), + .o_tvalid ({n2t_tvalid, n2s_tvalid}), + .o_tready ({n2t_tready, n2s_tready}) + ); + + axi_demux #( + .WIDTH(WIDTH), .SIZE(2), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) sou_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (sou_i_hdr), + .dest (ydim_route(sou_i_hdr)), + .i_tdata (sou_i_tdata), + .i_tlast (sou_i_tlast), + .i_tvalid (sou_i_tvalid), + .i_tready (sou_i_tready), + .o_tdata ({s2t_tdata, s2n_tdata}), + .o_tlast ({s2t_tlast, s2n_tlast}), + .o_tvalid ({s2t_tvalid, s2n_tvalid}), + .o_tready ({s2t_tready, s2n_tready}) + ); + + // ------------------------------------------------- + // Output muxes + // ------------------------------------------------- + + axi_mux #( + .WIDTH(WIDTH), .SIZE(5), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) ter_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2t_tdata, w2t_tdata, e2t_tdata, n2t_tdata, s2t_tdata}), + .i_tlast ({t2t_tlast, w2t_tlast, e2t_tlast, n2t_tlast, s2t_tlast}), + .i_tvalid ({t2t_tvalid, w2t_tvalid, e2t_tvalid, n2t_tvalid, s2t_tvalid}), + .i_tready ({t2t_tready, w2t_tready, e2t_tready, n2t_tready, s2t_tready}), + .o_tdata (m_axis_ter_tdata), + .o_tlast (m_axis_ter_tlast), + .o_tvalid (m_axis_ter_tvalid), + .o_tready (m_axis_ter_tready) + ); + + axi_mux #( + .WIDTH(WIDTH), .SIZE(2), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) wst_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2w_tdata, e2w_tdata}), + .i_tlast ({t2w_tlast, e2w_tlast}), + .i_tvalid ({t2w_tvalid, e2w_tvalid}), + .i_tready ({t2w_tready, e2w_tready}), + .o_tdata (m_axis_wst_tdata), + .o_tlast (m_axis_wst_tlast), + .o_tvalid (m_axis_wst_tvalid), + .o_tready (m_axis_wst_tready) + ); + assign m_axis_wst_tdest = 1'b0; + + axi_mux #( + .WIDTH(WIDTH), .SIZE(2), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) est_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2e_tdata, w2e_tdata}), + .i_tlast ({t2e_tlast, w2e_tlast}), + .i_tvalid ({t2e_tvalid, w2e_tvalid}), + .i_tready ({t2e_tready, w2e_tready}), + .o_tdata (m_axis_est_tdata), + .o_tlast (m_axis_est_tlast), + .o_tvalid (m_axis_est_tvalid), + + + .o_tready (m_axis_est_tready) + ); + assign m_axis_est_tdest = 1'b0; + + axi_mux #( + .WIDTH(WIDTH), .SIZE(4), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) nor_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2n_tdata, w2n_tdata, e2n_tdata, s2n_tdata}), + .i_tlast ({t2n_tlast, w2n_tlast, e2n_tlast, s2n_tlast}), + .i_tvalid ({t2n_tvalid, w2n_tvalid, e2n_tvalid, s2n_tvalid}), + .i_tready ({t2n_tready, w2n_tready, e2n_tready, s2n_tready}), + .o_tdata (m_axis_nor_tdata), + .o_tlast (m_axis_nor_tlast), + .o_tvalid (m_axis_nor_tvalid), + .o_tready (m_axis_nor_tready) + ); + assign m_axis_nor_tdest = 1'b0; + + axi_mux #( + .WIDTH(WIDTH), .SIZE(4), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) sou_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2s_tdata, w2s_tdata, e2s_tdata, n2s_tdata}), + .i_tlast ({t2s_tlast, w2s_tlast, e2s_tlast, n2s_tlast}), + .i_tvalid ({t2s_tvalid, w2s_tvalid, e2s_tvalid, n2s_tvalid}), + .i_tready ({t2s_tready, w2s_tready, e2s_tready, n2s_tready}), + .o_tdata (m_axis_sou_tdata), + .o_tlast (m_axis_sou_tlast), + .o_tvalid (m_axis_sou_tvalid), + .o_tready (m_axis_sou_tready) + ); + assign m_axis_sou_tdest = 1'b0; + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v new file mode 100644 index 000000000..65cded545 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v @@ -0,0 +1,398 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: mesh_2d_dor_router_single_sw +// Description: +// This module implements the router for a 2-dimentional (2d) +// mesh network that uses dimension order routing (dor) and has a +// single underlying switch (single_sw). It uses AXI-Stream for all of its +// links. +// The mesh topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - WIDTH: Width of the AXI-Stream data bus +// - DIM_SIZE: Number of routers alone one dimension +// - XB_ADDR_X: The X-coordinate of this router in the topology +// - XB_ADDR_Y: The Y-coordinate of this router in the topology +// - TERM_BUFF_SIZE: log2 of the ingress terminal buffer size (in words) +// - XB_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words) +// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers. +// * WORMHOLE: Allocate route as soon as first word in pkt arrives +// * CUT-THROUGH: Allocate route only after the full pkt arrives +// - SWITCH_ALLOC: Algorithm to allocate the switch +// * PRIO: Priority based. Priority: Y-dim > X-dim > Term +// * ROUND-ROBIN: Round robin input port allocation +// Signals: +// - *_axis_ter_*: Terminal ports (master/slave) +// - *_axis_wst_*: Inter-router X-dim west connections (master/slave) +// - *_axis_est_*: Inter-router X-dim east connections (master/slave) +// - *_axis_nor_*: Inter-router X-dim north connections (master/slave) +// - *_axis_sou_*: Inter-router X-dim south connections (master/slave) +// + +module mesh_2d_dor_router_single_sw #( + parameter WIDTH = 64, + parameter DIM_SIZE = 4, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0, + parameter TERM_BUFF_SIZE = 5, + parameter XB_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", // Routing (switching) method {WORMHOLE, CUT-THROUGH} + parameter SWITCH_ALLOC = "PRIO" // Switch allocation algorithm {ROUND-ROBIN, PRIO} +) ( + // Clocks and resets + input wire clk, + input wire reset, + + // Terminal connections + input wire [WIDTH-1:0] s_axis_ter_tdata, + input wire s_axis_ter_tlast, + input wire s_axis_ter_tvalid, + output wire s_axis_ter_tready, + output wire [WIDTH-1:0] m_axis_ter_tdata, + output wire m_axis_ter_tlast, + output wire m_axis_ter_tvalid, + input wire m_axis_ter_tready, + + // West inter-router connections + input wire [WIDTH-1:0] s_axis_wst_tdata, + input wire [0:0] s_axis_wst_tdest, + input wire s_axis_wst_tlast, + input wire s_axis_wst_tvalid, + output wire s_axis_wst_tready, + output wire [WIDTH-1:0] m_axis_wst_tdata, + output wire [0:0] m_axis_wst_tdest, + output wire m_axis_wst_tlast, + output wire m_axis_wst_tvalid, + input wire m_axis_wst_tready, + + // East inter-router connections + input wire [WIDTH-1:0] s_axis_est_tdata, + input wire [0:0] s_axis_est_tdest, + input wire s_axis_est_tlast, + input wire s_axis_est_tvalid, + output wire s_axis_est_tready, + output wire [WIDTH-1:0] m_axis_est_tdata, + output wire [0:0] m_axis_est_tdest, + output wire m_axis_est_tlast, + output wire m_axis_est_tvalid, + input wire m_axis_est_tready, + + // North inter-router connections + input wire [WIDTH-1:0] s_axis_nor_tdata, + input wire [0:0] s_axis_nor_tdest, + input wire s_axis_nor_tlast, + input wire s_axis_nor_tvalid, + output wire s_axis_nor_tready, + output wire [WIDTH-1:0] m_axis_nor_tdata, + output wire [0:0] m_axis_nor_tdest, + output wire m_axis_nor_tlast, + output wire m_axis_nor_tvalid, + input wire m_axis_nor_tready, + + // South inter-router connections + input wire [WIDTH-1:0] s_axis_sou_tdata, + input wire [0:0] s_axis_sou_tdest, + input wire s_axis_sou_tlast, + input wire s_axis_sou_tvalid, + output wire s_axis_sou_tready, + output wire [WIDTH-1:0] m_axis_sou_tdata, + output wire [0:0] m_axis_sou_tdest, + output wire m_axis_sou_tlast, + output wire m_axis_sou_tvalid, + input wire m_axis_sou_tready +); + // ------------------------------------------------- + // Routing functions + // ------------------------------------------------- + + // mesh_node_mapping.vh file contains the mapping between the node number + // and its XY coordinates. It is autogenerated and defines the node_to_xdst + // and node_to_ydst functions. + `include "mesh_node_mapping.vh" + + localparam [2:0] SW_DEST_TER = 3'd0; + localparam [2:0] SW_DEST_WST = 3'd1; + localparam [2:0] SW_DEST_EST = 3'd2; + localparam [2:0] SW_DEST_NOR = 3'd3; + localparam [2:0] SW_DEST_SOU = 3'd4; + localparam [2:0] SW_NUM_DESTS = 3'd5; + + // The compute_switch_tdest function is the destination selector + // i.e. it will inspecte the bottom $clog2(DIM_SIZE)*2 bits of the + // first word of a packet and determine the destination of the packet. + function [3:0] compute_switch_tdest; + input [WIDTH-1:0] header; + input [3:0] src; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, 3 LSBs are the router destination + // - VC in a mesh is always 0 + if (xdiff == 'd0 && ydiff == 'd0) begin + // VC=0 because terminals don't have VCs + compute_switch_tdest = {1'b0, SW_DEST_TER}; + end else if (xdiff == 'd0) begin + // VC=1 for CCW turns and VC=0 for everything else + if (ydiff < 0) + compute_switch_tdest = {(src == SW_DEST_WST), SW_DEST_NOR}; + else + compute_switch_tdest = {(src == SW_DEST_EST), SW_DEST_SOU}; + end else begin + // VC=0 because east-west paths don't have VCs + if (xdiff < 0) + compute_switch_tdest = {1'b0, SW_DEST_WST}; + else + compute_switch_tdest = {1'b0, SW_DEST_EST}; + end + if (xdst != 'hx && ydst != 'hx) begin + if (XB_ADDR_X == 0 && compute_switch_tdest == SW_DEST_WST) + $display("Illegal route chosen: WEST. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y); + if (XB_ADDR_X == DIM_SIZE-1 && compute_switch_tdest == SW_DEST_EST) + $display("Illegal route chosen: EAST. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y); + if (XB_ADDR_Y == 0 && compute_switch_tdest == SW_DEST_NOR) + $display("Illegal route chosen: NORTH. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y); + if (XB_ADDR_Y == DIM_SIZE-1 && compute_switch_tdest == SW_DEST_SOU) + $display("Illegal route chosen: SOUTH. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y); + end + //$display("xdst=%d, ydst=%d, xaddr=%d, yaddr=%d, dst=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y, compute_switch_tdest); + end + endfunction + + // The compute_switch_alloc function is the switch allocation function + // i.e. it chooses which input port reserves the switch for packet transfer. + // After the switch is allocated, all other ports will be backpressured until + // the packet finishes transferring. + function [2:0] compute_switch_alloc; + input [4:0] pkt_waiting; + input [2:0] last_alloc; + begin + if (pkt_waiting == 5'b00000) begin + compute_switch_alloc = SW_DEST_TER; + end else if (pkt_waiting == 5'b00001) begin + compute_switch_alloc = SW_DEST_TER; + end else if (pkt_waiting == 5'b00010) begin + compute_switch_alloc = SW_DEST_WST; + end else if (pkt_waiting == 5'b00100) begin + compute_switch_alloc = SW_DEST_EST; + end else if (pkt_waiting == 5'b01000) begin + compute_switch_alloc = SW_DEST_NOR; + end else if (pkt_waiting == 5'b10000) begin + compute_switch_alloc = SW_DEST_SOU; + end else begin + if (SWITCH_ALLOC == "PRIO") begin + // Priority: South > East > North > West > Term + if (pkt_waiting[SW_DEST_SOU]) + compute_switch_alloc = SW_DEST_SOU; + else if (pkt_waiting[SW_DEST_EST]) + compute_switch_alloc = SW_DEST_EST; + else if (pkt_waiting[SW_DEST_NOR]) + compute_switch_alloc = SW_DEST_NOR; + else if (pkt_waiting[SW_DEST_WST]) + compute_switch_alloc = SW_DEST_WST; + else + compute_switch_alloc = SW_DEST_TER; + end else begin + // Round-robin + if (pkt_waiting[(last_alloc + 3'd1) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd1) % SW_NUM_DESTS; + else if (pkt_waiting[(last_alloc + 3'd2) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd2) % SW_NUM_DESTS; + else if (pkt_waiting[(last_alloc + 3'd3) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd3) % SW_NUM_DESTS; + else if (pkt_waiting[(last_alloc + 3'd4) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd4) % SW_NUM_DESTS; + else + compute_switch_alloc = last_alloc; + end + end + //$display("pkt_waiting=%b, alloc=%d, last_alloc=%d", pkt_waiting, compute_switch_alloc, last_alloc); + end + endfunction + + // ------------------------------------------------- + // Input buffers + // ------------------------------------------------- + wire [WIDTH-1:0] ter_i_tdata; + wire [3:0] ter_i_tdest; + wire ter_i_tlast; + wire ter_i_tvalid; + wire ter_i_tready; + + // Data coming in from the terminal is gated until a full packet arrives + // in order to minimize the switch allocation time per packet. + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE) + ) term_in_pkt_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_ter_tdata), + .i_tlast (s_axis_ter_tlast), + .i_tvalid (s_axis_ter_tvalid), + .i_tready (s_axis_ter_tready), + .i_terror (1'b0), + .o_tdata (ter_i_tdata), + .o_tlast (ter_i_tlast), + .o_tvalid (ter_i_tvalid), + .o_tready (ter_i_tready) + ); + assign ter_i_tdest = compute_switch_tdest(ter_i_tdata, SW_DEST_TER); + + wire [WIDTH-1:0] wst_i_tdata, est_i_tdata, nor_i_tdata, sou_i_tdata; + wire [3:0] wst_i_tdest, est_i_tdest, nor_i_tdest, sou_i_tdest; + wire wst_i_tlast, est_i_tlast, nor_i_tlast, sou_i_tlast; + wire wst_i_tvalid, est_i_tvalid, nor_i_tvalid, sou_i_tvalid; + wire wst_i_tready, est_i_tready, nor_i_tready, sou_i_tready; + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) wst_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_wst_tdata), + .s_axis_tdest (s_axis_wst_tdest), + .s_axis_tlast (s_axis_wst_tlast), + .s_axis_tvalid (s_axis_wst_tvalid), + .s_axis_tready (s_axis_wst_tready), + .m_axis_tdata (wst_i_tdata), + .m_axis_tlast (wst_i_tlast), + .m_axis_tvalid (wst_i_tvalid), + .m_axis_tready (wst_i_tready) + ); + assign wst_i_tdest = compute_switch_tdest(wst_i_tdata, SW_DEST_WST); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) est_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_est_tdata), + .s_axis_tdest (s_axis_est_tdest), + .s_axis_tlast (s_axis_est_tlast), + .s_axis_tvalid (s_axis_est_tvalid), + .s_axis_tready (s_axis_est_tready), + .m_axis_tdata (est_i_tdata), + .m_axis_tlast (est_i_tlast), + .m_axis_tvalid (est_i_tvalid), + .m_axis_tready (est_i_tready) + ); + assign est_i_tdest = compute_switch_tdest(est_i_tdata, SW_DEST_EST); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), // Only north-south traffic has VCs + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) nor_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_nor_tdata), + .s_axis_tdest (s_axis_nor_tdest), + .s_axis_tlast (s_axis_nor_tlast), + .s_axis_tvalid (s_axis_nor_tvalid), + .s_axis_tready (s_axis_nor_tready), + .m_axis_tdata (nor_i_tdata), + .m_axis_tlast (nor_i_tlast), + .m_axis_tvalid (nor_i_tvalid), + .m_axis_tready (nor_i_tready) + ); + assign nor_i_tdest = compute_switch_tdest(nor_i_tdata, SW_DEST_NOR); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), // Only north-south traffic has VCs + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) sou_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_sou_tdata), + .s_axis_tdest (s_axis_sou_tdest), + .s_axis_tlast (s_axis_sou_tlast), + .s_axis_tvalid (s_axis_sou_tvalid), + .s_axis_tready (s_axis_sou_tready), + .m_axis_tdata (sou_i_tdata), + .m_axis_tlast (sou_i_tlast), + .m_axis_tvalid (sou_i_tvalid), + .m_axis_tready (sou_i_tready) + ); + assign sou_i_tdest = compute_switch_tdest(sou_i_tdata, SW_DEST_SOU); + + //------------------------------------------------- + // Switch + //------------------------------------------------- + // Track the input packet state + localparam [0:0] PKT_ST_HEAD = 1'b0; + localparam [0:0] PKT_ST_BODY = 1'b1; + reg [0:0] pkt_state = PKT_ST_HEAD; + + // The switch only accept packets on a single port at a time. + wire sw_in_ready = |({sou_i_tready, nor_i_tready, est_i_tready, wst_i_tready, ter_i_tready}); + wire sw_in_valid = |({sou_i_tvalid, nor_i_tvalid, est_i_tvalid, wst_i_tvalid, ter_i_tvalid}); + wire sw_in_last = |({sou_i_tlast & sou_i_tvalid, nor_i_tlast & nor_i_tvalid, + est_i_tlast & est_i_tvalid, wst_i_tlast & wst_i_tvalid, + ter_i_tlast & ter_i_tvalid}); + + always @(posedge clk) begin + if (reset) begin + pkt_state <= PKT_ST_HEAD; + end else if (sw_in_valid & sw_in_ready) begin + pkt_state <= sw_in_last ? PKT_ST_HEAD : PKT_ST_BODY; + end + end + + // The switch requires the allocation to stay valid until the + // end of the packet. We also might need to keep the previous + // packet's allocation to compute the current one + wire [2:0] switch_alloc; + reg [2:0] prev_switch_alloc = SW_DEST_TER; + reg [2:0] pkt_switch_alloc = SW_DEST_TER; + + always @(posedge clk) begin + if (reset) begin + prev_switch_alloc <= SW_DEST_TER; + pkt_switch_alloc <= SW_DEST_TER; + end else if (sw_in_valid & sw_in_ready) begin + if (pkt_state == PKT_ST_HEAD) + pkt_switch_alloc <= switch_alloc; + if (sw_in_last) + prev_switch_alloc <= switch_alloc; + end + end + + assign switch_alloc = (sw_in_valid && pkt_state == PKT_ST_HEAD) ? + compute_switch_alloc({sou_i_tvalid, nor_i_tvalid, est_i_tvalid, wst_i_tvalid, ter_i_tvalid}, prev_switch_alloc) : + pkt_switch_alloc; + + wire ter_tdest_discard; + axis_switch #( + .DATA_W(WIDTH), .DEST_W(1), .IN_PORTS(5), .OUT_PORTS(5) + ) switch_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata ({sou_i_tdata , nor_i_tdata , est_i_tdata , wst_i_tdata , ter_i_tdata }), + .s_axis_tdest ({sou_i_tdest , nor_i_tdest , est_i_tdest , wst_i_tdest , ter_i_tdest }), + .s_axis_tlast ({sou_i_tlast , nor_i_tlast , est_i_tlast , wst_i_tlast , ter_i_tlast }), + .s_axis_tvalid ({sou_i_tvalid, nor_i_tvalid, est_i_tvalid, wst_i_tvalid, ter_i_tvalid}), + .s_axis_tready ({sou_i_tready, nor_i_tready, est_i_tready, wst_i_tready, ter_i_tready}), + .s_axis_alloc (switch_alloc), + .m_axis_tdata ({m_axis_sou_tdata, m_axis_nor_tdata, m_axis_est_tdata, m_axis_wst_tdata, m_axis_ter_tdata }), + .m_axis_tdest ({m_axis_sou_tdest, m_axis_nor_tdest, m_axis_est_tdest, m_axis_wst_tdest, ter_tdest_discard}), + .m_axis_tlast ({m_axis_sou_tlast, m_axis_nor_tlast, m_axis_est_tlast, m_axis_wst_tlast, m_axis_ter_tlast }), + .m_axis_tvalid ({m_axis_sou_tvalid, m_axis_nor_tvalid, m_axis_est_tvalid, m_axis_wst_tvalid, m_axis_ter_tvalid}), + .m_axis_tready ({m_axis_sou_tready, m_axis_nor_tready, m_axis_est_tready, m_axis_wst_tready, m_axis_ter_tready}) + ); + + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh b/fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh new file mode 100644 index 000000000..466b0c615 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh @@ -0,0 +1,294 @@ +// Copyright 2018 Ettus Research, A National Instruments Company +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Autogenerated file. Do not modify. +// $ ./gen_node_to_coord_mapping.py --dimsize 16 --pattern spiral + +parameter CLOG2_DIM_SIZE = $clog2(DIM_SIZE); //Vivado workaround + + +// DIM_SIZE = 2, PATTERN = SPIRAL +//------------------------------------ +// 0 1 +// 3 2 +localparam [3:0] XCOORD_DIM_002 = {1'd0,1'd1,1'd1,1'd0}; +localparam [3:0] YCOORD_DIM_002 = {1'd1,1'd1,1'd0,1'd0}; + +// DIM_SIZE = 3, PATTERN = SPIRAL +//------------------------------------ +// 6 7 8 +// 5 0 1 +// 4 3 2 +localparam [17:0] XCOORD_DIM_003 = {2'd2,2'd1,2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd1}; +localparam [17:0] YCOORD_DIM_003 = {2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd2,2'd1,2'd1}; + +// DIM_SIZE = 4, PATTERN = SPIRAL +//------------------------------------ +// 6 7 8 9 +// 5 0 1 10 +// 4 3 2 11 +// 15 14 13 12 +localparam [31:0] XCOORD_DIM_004 = {2'd0,2'd1,2'd2,2'd3,2'd3,2'd3,2'd3,2'd2,2'd1,2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd1}; +localparam [31:0] YCOORD_DIM_004 = {2'd3,2'd3,2'd3,2'd3,2'd2,2'd1,2'd0,2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd2,2'd1,2'd1}; + +// DIM_SIZE = 5, PATTERN = SPIRAL +//------------------------------------ +// 20 21 22 23 24 +// 19 6 7 8 9 +// 18 5 0 1 10 +// 17 4 3 2 11 +// 16 15 14 13 12 +localparam [74:0] XCOORD_DIM_005 = {3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd2}; +localparam [74:0] YCOORD_DIM_005 = {3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd3,3'd2,3'd2}; + +// DIM_SIZE = 6, PATTERN = SPIRAL +//------------------------------------ +// 20 21 22 23 24 25 +// 19 6 7 8 9 26 +// 18 5 0 1 10 27 +// 17 4 3 2 11 28 +// 16 15 14 13 12 29 +// 35 34 33 32 31 30 +localparam [107:0] XCOORD_DIM_006 = {3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd2}; +localparam [107:0] YCOORD_DIM_006 = {3'd5,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd3,3'd2,3'd2}; + +// DIM_SIZE = 7, PATTERN = SPIRAL +//------------------------------------ +// 42 43 44 45 46 47 48 +// 41 20 21 22 23 24 25 +// 40 19 6 7 8 9 26 +// 39 18 5 0 1 10 27 +// 38 17 4 3 2 11 28 +// 37 16 15 14 13 12 29 +// 36 35 34 33 32 31 30 +localparam [146:0] XCOORD_DIM_007 = {3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd3}; +localparam [146:0] YCOORD_DIM_007 = {3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd4,3'd3,3'd3}; + +// DIM_SIZE = 8, PATTERN = SPIRAL +//------------------------------------ +// 42 43 44 45 46 47 48 49 +// 41 20 21 22 23 24 25 50 +// 40 19 6 7 8 9 26 51 +// 39 18 5 0 1 10 27 52 +// 38 17 4 3 2 11 28 53 +// 37 16 15 14 13 12 29 54 +// 36 35 34 33 32 31 30 55 +// 63 62 61 60 59 58 57 56 +localparam [191:0] XCOORD_DIM_008 = {3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd3}; +localparam [191:0] YCOORD_DIM_008 = {3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd4,3'd3,3'd3}; + +// DIM_SIZE = 9, PATTERN = SPIRAL +//------------------------------------ +// 72 73 74 75 76 77 78 79 80 +// 71 42 43 44 45 46 47 48 49 +// 70 41 20 21 22 23 24 25 50 +// 69 40 19 6 7 8 9 26 51 +// 68 39 18 5 0 1 10 27 52 +// 67 38 17 4 3 2 11 28 53 +// 66 37 16 15 14 13 12 29 54 +// 65 36 35 34 33 32 31 30 55 +// 64 63 62 61 60 59 58 57 56 +localparam [323:0] XCOORD_DIM_009 = {4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd4}; +localparam [323:0] YCOORD_DIM_009 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd5,4'd4,4'd4}; + +// DIM_SIZE = 10, PATTERN = SPIRAL +//------------------------------------ +// 72 73 74 75 76 77 78 79 80 81 +// 71 42 43 44 45 46 47 48 49 82 +// 70 41 20 21 22 23 24 25 50 83 +// 69 40 19 6 7 8 9 26 51 84 +// 68 39 18 5 0 1 10 27 52 85 +// 67 38 17 4 3 2 11 28 53 86 +// 66 37 16 15 14 13 12 29 54 87 +// 65 36 35 34 33 32 31 30 55 88 +// 64 63 62 61 60 59 58 57 56 89 +// 99 98 97 96 95 94 93 92 91 90 +localparam [399:0] XCOORD_DIM_010 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd4}; +localparam [399:0] YCOORD_DIM_010 = {4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd5,4'd4,4'd4}; + +// DIM_SIZE = 11, PATTERN = SPIRAL +//------------------------------------ +// 110 111 112 113 114 115 116 117 118 119 120 +// 109 72 73 74 75 76 77 78 79 80 81 +// 108 71 42 43 44 45 46 47 48 49 82 +// 107 70 41 20 21 22 23 24 25 50 83 +// 106 69 40 19 6 7 8 9 26 51 84 +// 105 68 39 18 5 0 1 10 27 52 85 +// 104 67 38 17 4 3 2 11 28 53 86 +// 103 66 37 16 15 14 13 12 29 54 87 +// 102 65 36 35 34 33 32 31 30 55 88 +// 101 64 63 62 61 60 59 58 57 56 89 +// 100 99 98 97 96 95 94 93 92 91 90 +localparam [483:0] XCOORD_DIM_011 = {4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd5}; +localparam [483:0] YCOORD_DIM_011 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd6,4'd5,4'd5}; + +// DIM_SIZE = 12, PATTERN = SPIRAL +//------------------------------------ +// 110 111 112 113 114 115 116 117 118 119 120 121 +// 109 72 73 74 75 76 77 78 79 80 81 122 +// 108 71 42 43 44 45 46 47 48 49 82 123 +// 107 70 41 20 21 22 23 24 25 50 83 124 +// 106 69 40 19 6 7 8 9 26 51 84 125 +// 105 68 39 18 5 0 1 10 27 52 85 126 +// 104 67 38 17 4 3 2 11 28 53 86 127 +// 103 66 37 16 15 14 13 12 29 54 87 128 +// 102 65 36 35 34 33 32 31 30 55 88 129 +// 101 64 63 62 61 60 59 58 57 56 89 130 +// 100 99 98 97 96 95 94 93 92 91 90 131 +// 143 142 141 140 139 138 137 136 135 134 133 132 +localparam [575:0] XCOORD_DIM_012 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd5}; +localparam [575:0] YCOORD_DIM_012 = {4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd6,4'd5,4'd5}; + +// DIM_SIZE = 13, PATTERN = SPIRAL +//------------------------------------ +// 156 157 158 159 160 161 162 163 164 165 166 167 168 +// 155 110 111 112 113 114 115 116 117 118 119 120 121 +// 154 109 72 73 74 75 76 77 78 79 80 81 122 +// 153 108 71 42 43 44 45 46 47 48 49 82 123 +// 152 107 70 41 20 21 22 23 24 25 50 83 124 +// 151 106 69 40 19 6 7 8 9 26 51 84 125 +// 150 105 68 39 18 5 0 1 10 27 52 85 126 +// 149 104 67 38 17 4 3 2 11 28 53 86 127 +// 148 103 66 37 16 15 14 13 12 29 54 87 128 +// 147 102 65 36 35 34 33 32 31 30 55 88 129 +// 146 101 64 63 62 61 60 59 58 57 56 89 130 +// 145 100 99 98 97 96 95 94 93 92 91 90 131 +// 144 143 142 141 140 139 138 137 136 135 134 133 132 +localparam [675:0] XCOORD_DIM_013 = {4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd6}; +localparam [675:0] YCOORD_DIM_013 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd7,4'd6,4'd6}; + +// DIM_SIZE = 14, PATTERN = SPIRAL +//------------------------------------ +// 156 157 158 159 160 161 162 163 164 165 166 167 168 169 +// 155 110 111 112 113 114 115 116 117 118 119 120 121 170 +// 154 109 72 73 74 75 76 77 78 79 80 81 122 171 +// 153 108 71 42 43 44 45 46 47 48 49 82 123 172 +// 152 107 70 41 20 21 22 23 24 25 50 83 124 173 +// 151 106 69 40 19 6 7 8 9 26 51 84 125 174 +// 150 105 68 39 18 5 0 1 10 27 52 85 126 175 +// 149 104 67 38 17 4 3 2 11 28 53 86 127 176 +// 148 103 66 37 16 15 14 13 12 29 54 87 128 177 +// 147 102 65 36 35 34 33 32 31 30 55 88 129 178 +// 146 101 64 63 62 61 60 59 58 57 56 89 130 179 +// 145 100 99 98 97 96 95 94 93 92 91 90 131 180 +// 144 143 142 141 140 139 138 137 136 135 134 133 132 181 +// 195 194 193 192 191 190 189 188 187 186 185 184 183 182 +localparam [783:0] XCOORD_DIM_014 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd6}; +localparam [783:0] YCOORD_DIM_014 = {4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd7,4'd6,4'd6}; + +// DIM_SIZE = 15, PATTERN = SPIRAL +//------------------------------------ +// 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 +// 209 156 157 158 159 160 161 162 163 164 165 166 167 168 169 +// 208 155 110 111 112 113 114 115 116 117 118 119 120 121 170 +// 207 154 109 72 73 74 75 76 77 78 79 80 81 122 171 +// 206 153 108 71 42 43 44 45 46 47 48 49 82 123 172 +// 205 152 107 70 41 20 21 22 23 24 25 50 83 124 173 +// 204 151 106 69 40 19 6 7 8 9 26 51 84 125 174 +// 203 150 105 68 39 18 5 0 1 10 27 52 85 126 175 +// 202 149 104 67 38 17 4 3 2 11 28 53 86 127 176 +// 201 148 103 66 37 16 15 14 13 12 29 54 87 128 177 +// 200 147 102 65 36 35 34 33 32 31 30 55 88 129 178 +// 199 146 101 64 63 62 61 60 59 58 57 56 89 130 179 +// 198 145 100 99 98 97 96 95 94 93 92 91 90 131 180 +// 197 144 143 142 141 140 139 138 137 136 135 134 133 132 181 +// 196 195 194 193 192 191 190 189 188 187 186 185 184 183 182 +localparam [899:0] XCOORD_DIM_015 = {4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd7}; +localparam [899:0] YCOORD_DIM_015 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd8,4'd7,4'd7}; + +// DIM_SIZE = 16, PATTERN = SPIRAL +//------------------------------------ +// 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 +// 209 156 157 158 159 160 161 162 163 164 165 166 167 168 169 226 +// 208 155 110 111 112 113 114 115 116 117 118 119 120 121 170 227 +// 207 154 109 72 73 74 75 76 77 78 79 80 81 122 171 228 +// 206 153 108 71 42 43 44 45 46 47 48 49 82 123 172 229 +// 205 152 107 70 41 20 21 22 23 24 25 50 83 124 173 230 +// 204 151 106 69 40 19 6 7 8 9 26 51 84 125 174 231 +// 203 150 105 68 39 18 5 0 1 10 27 52 85 126 175 232 +// 202 149 104 67 38 17 4 3 2 11 28 53 86 127 176 233 +// 201 148 103 66 37 16 15 14 13 12 29 54 87 128 177 234 +// 200 147 102 65 36 35 34 33 32 31 30 55 88 129 178 235 +// 199 146 101 64 63 62 61 60 59 58 57 56 89 130 179 236 +// 198 145 100 99 98 97 96 95 94 93 92 91 90 131 180 237 +// 197 144 143 142 141 140 139 138 137 136 135 134 133 132 181 238 +// 196 195 194 193 192 191 190 189 188 187 186 185 184 183 182 239 +// 255 254 253 252 251 250 249 248 247 246 245 244 243 242 241 240 +localparam [1023:0] XCOORD_DIM_016 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd7}; +localparam [1023:0] YCOORD_DIM_016 = {4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd8,4'd7,4'd7}; + + +function [CLOG2_DIM_SIZE-1:0] node_to_xdst; + input [WIDTH-1:0] header; +begin + if (DIM_SIZE == 2) + node_to_xdst = XCOORD_DIM_002[1*header[1:0] +: 1]; + else if (DIM_SIZE == 3) + node_to_xdst = XCOORD_DIM_003[2*header[3:0] +: 2]; + else if (DIM_SIZE == 4) + node_to_xdst = XCOORD_DIM_004[2*header[3:0] +: 2]; + else if (DIM_SIZE == 5) + node_to_xdst = XCOORD_DIM_005[3*header[4:0] +: 3]; + else if (DIM_SIZE == 6) + node_to_xdst = XCOORD_DIM_006[3*header[5:0] +: 3]; + else if (DIM_SIZE == 7) + node_to_xdst = XCOORD_DIM_007[3*header[5:0] +: 3]; + else if (DIM_SIZE == 8) + node_to_xdst = XCOORD_DIM_008[3*header[5:0] +: 3]; + else if (DIM_SIZE == 9) + node_to_xdst = XCOORD_DIM_009[4*header[6:0] +: 4]; + else if (DIM_SIZE == 10) + node_to_xdst = XCOORD_DIM_010[4*header[6:0] +: 4]; + else if (DIM_SIZE == 11) + node_to_xdst = XCOORD_DIM_011[4*header[6:0] +: 4]; + else if (DIM_SIZE == 12) + node_to_xdst = XCOORD_DIM_012[4*header[7:0] +: 4]; + else if (DIM_SIZE == 13) + node_to_xdst = XCOORD_DIM_013[4*header[7:0] +: 4]; + else if (DIM_SIZE == 14) + node_to_xdst = XCOORD_DIM_014[4*header[7:0] +: 4]; + else if (DIM_SIZE == 15) + node_to_xdst = XCOORD_DIM_015[4*header[7:0] +: 4]; + else if (DIM_SIZE == 16) + node_to_xdst = XCOORD_DIM_016[4*header[7:0] +: 4]; + else + node_to_xdst = {CLOG2_DIM_SIZE{1'd0}}; +end endfunction + +function [CLOG2_DIM_SIZE-1:0] node_to_ydst; + input [WIDTH-1:0] header; +begin + if (DIM_SIZE == 2) + node_to_ydst = YCOORD_DIM_002[1*header[1:0] +: 1]; + else if (DIM_SIZE == 3) + node_to_ydst = YCOORD_DIM_003[2*header[3:0] +: 2]; + else if (DIM_SIZE == 4) + node_to_ydst = YCOORD_DIM_004[2*header[3:0] +: 2]; + else if (DIM_SIZE == 5) + node_to_ydst = YCOORD_DIM_005[3*header[4:0] +: 3]; + else if (DIM_SIZE == 6) + node_to_ydst = YCOORD_DIM_006[3*header[5:0] +: 3]; + else if (DIM_SIZE == 7) + node_to_ydst = YCOORD_DIM_007[3*header[5:0] +: 3]; + else if (DIM_SIZE == 8) + node_to_ydst = YCOORD_DIM_008[3*header[5:0] +: 3]; + else if (DIM_SIZE == 9) + node_to_ydst = YCOORD_DIM_009[4*header[6:0] +: 4]; + else if (DIM_SIZE == 10) + node_to_ydst = YCOORD_DIM_010[4*header[6:0] +: 4]; + else if (DIM_SIZE == 11) + node_to_ydst = YCOORD_DIM_011[4*header[6:0] +: 4]; + else if (DIM_SIZE == 12) + node_to_ydst = YCOORD_DIM_012[4*header[7:0] +: 4]; + else if (DIM_SIZE == 13) + node_to_ydst = YCOORD_DIM_013[4*header[7:0] +: 4]; + else if (DIM_SIZE == 14) + node_to_ydst = YCOORD_DIM_014[4*header[7:0] +: 4]; + else if (DIM_SIZE == 15) + node_to_ydst = YCOORD_DIM_015[4*header[7:0] +: 4]; + else if (DIM_SIZE == 16) + node_to_ydst = YCOORD_DIM_016[4*header[7:0] +: 4]; + else + node_to_ydst = {CLOG2_DIM_SIZE{1'd0}}; +end endfunction + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl new file mode 100644 index 000000000..39440b512 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl @@ -0,0 +1,18 @@ +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +create_project tmp_proj -part xc7k410tffg900-3 -in_memory +add_files {axis_ctrl_crossbar_nxn_top.v ../axis_ctrl_crossbar_nxn.v ../axis_ctrl_crossbar_2d_mesh.v ../mesh_2d_dor_router_multi_sw.v ../axis_switch.v ../axis_ingress_vc_buff.v ../mesh_node_mapping.vh ../mesh_2d_dor_router_single_sw.v ../torus_2d_dor_router_single_sw.v ../torus_2d_dor_router_multi_sw.v ../axis_port_terminator.v} +add_files {../../../fifo/axi_fifo_flop.v ../../../fifo/axi_fifo_flop2.v ../../../fifo/axi_fifo.v ../../../fifo/axi_mux_select.v ../../../fifo/axi_fifo_bram.v ../../../fifo/axi_fifo_cascade.v ../../../fifo/axi_mux.v ../../../fifo/axi_fifo_short.v ../../../fifo/axi_demux.v ../../../fifo/axi_packet_gate.v ../../../control/map/cam_priority_encoder.v ../../../control/map/cam_srl.v ../../../control/map/cam_bram.v ../../../control/map/cam.v ../../../control/map/kv_map.v ../../../control/map/axis_muxed_kv_map.v ../../../control/ram_2port.v} +set_property top axis_ctrl_crossbar_nxn_top [current_fileset] +synth_design +create_clock -name clk -period 2.0 [get_ports clk] +report_utilization -no_primitives -file axis_ctrl_crossbar_nxn.rpt +report_timing_summary -setup -no_detailed_paths -no_header -datasheet -append -file axis_ctrl_crossbar_nxn.rpt +write_checkpoint -force axis_ctrl_crossbar_nxn.dcp +close_project +exit \ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in new file mode 100644 index 000000000..6805100b9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in @@ -0,0 +1,47 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module axis_ctrl_crossbar_nxn_top( + input clk, + input rst +); + // Router global config + localparam IMPL = "{top}"; + localparam NPORTS = {ports}; + localparam DWIDTH = {dataw}; + localparam MTU = {mtu}; + localparam ROUTING = "{ralloc}"; + + (* dont_touch = "true"*) wire [(DWIDTH*NPORTS)-1:0] s_axis_tdata , m_axis_tdata ; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tlast , m_axis_tlast ; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tvalid, m_axis_tvalid; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tready, m_axis_tready; + (* dont_touch = "true"*) wire deadlock_detected; + + axis_ctrl_crossbar_nxn #( + .WIDTH (DWIDTH), + .NPORTS (NPORTS), + .TOPOLOGY (IMPL), + .INGRESS_BUFF_SIZE(MTU), + .ROUTER_BUFF_SIZE (MTU), + .ROUTING_ALLOC (ROUTING), + .SWITCH_ALLOC ("ROUND-ROBIN") + ) router_dut_i ( + .clk (clk), + .reset (rst), + .s_axis_tdata (s_axis_tdata ), + .s_axis_tlast (s_axis_tlast ), + .s_axis_tvalid (s_axis_tvalid), + .s_axis_tready (s_axis_tready), + .m_axis_tdata (m_axis_tdata ), + .m_axis_tlast (m_axis_tlast ), + .m_axis_tvalid (m_axis_tvalid), + .m_axis_tready (m_axis_tready), + .deadlock_detected(deadlock_detected) + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl new file mode 100644 index 000000000..304384aee --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl @@ -0,0 +1,18 @@ +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +create_project tmp_proj -part xc7k410tffg900-3 -in_memory +add_files {chdr_crossbar_nxn_top.v ../chdr_crossbar_nxn.v ../axis_switch.v ../chdr_xb_ingress_buff.v ../chdr_xb_routing_table.v ../../core/chdr_mgmt_pkt_handler.v ../../core/rfnoc_chdr_utils.vh ../../core/rfnoc_chdr_internal_utils.vh} +add_files {../../../fifo/axi_fifo_flop.v ../../../fifo/axi_fifo_flop2.v ../../../fifo/axi_fifo.v ../../../fifo/axi_mux_select.v ../../../fifo/axi_fifo_bram.v ../../../fifo/axi_fifo_cascade.v ../../../fifo/axi_mux.v ../../../fifo/axi_fifo_short.v ../../../fifo/axi_demux.v ../../../fifo/axi_packet_gate.v ../../../control/map/cam_priority_encoder.v ../../../control/map/cam_srl.v ../../../control/map/cam_bram.v ../../../control/map/cam.v ../../../control/map/kv_map.v ../../../control/map/axis_muxed_kv_map.v ../../../control/ram_2port.v} +set_property top chdr_crossbar_nxn_top [current_fileset] +synth_design +create_clock -name clk -period 2.0 [get_ports clk] +report_utilization -no_primitives -file chdr_crossbar_nxn.rpt +report_timing_summary -setup -no_detailed_paths -no_header -datasheet -append -file chdr_crossbar_nxn.rpt +write_checkpoint -force chdr_crossbar_nxn.dcp +close_project +exit \ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in new file mode 100644 index 000000000..fbf0852a3 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in @@ -0,0 +1,55 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module chdr_crossbar_nxn_top( + input clk, + input rst +); + // Router global config + localparam NPORTS = {ports}; + localparam DWIDTH = {dataw}; + localparam MTU = {mtu}; + localparam RLUT_SIZE = {rlutsize}; + localparam OPTIMIZE = "{opt}"; + + (* dont_touch = "true"*) wire [(DWIDTH*NPORTS)-1:0] s_axis_tdata , m_axis_tdata ; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tlast , m_axis_tlast ; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tvalid, m_axis_tvalid; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tready, m_axis_tready; + + chdr_crossbar_nxn #( + .CHDR_W (DWIDTH), + .NPORTS (NPORTS), + .DEFAULT_PORT (0), + .MTU (MTU), + .ROUTE_TBL_SIZE (RLUT_SIZE), + .MUX_ALLOC ("ROUND-ROBIN"), + .OPTIMIZE (OPTIMIZE), + .NPORTS_MGMT (NPORTS), + .EXT_RTCFG_PORT (1) + ) router_dut_i ( + // General + .clk (clk), + .reset (rst), + // Inputs + .s_axis_tdata (s_axis_tdata), + .s_axis_tlast (s_axis_tlast), + .s_axis_tvalid (s_axis_tvalid), + .s_axis_tready (s_axis_tready), + // Output + .m_axis_tdata (m_axis_tdata), + .m_axis_tlast (m_axis_tlast), + .m_axis_tvalid (m_axis_tvalid), + .m_axis_tready (m_axis_tready), + // External rtcfg port + .ext_rtcfg_stb (0), + .ext_rtcfg_addr (0), + .ext_rtcfg_data (0), + .ext_rtcfg_ack () + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py new file mode 100755 index 000000000..4ca6e07fa --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py @@ -0,0 +1,37 @@ +#! /usr/bin/python3 +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +import argparse +import synth_run + +modname = 'axis_ctrl_crossbar_nxn' + +# Parse command line options +def get_options(): + parser = argparse.ArgumentParser(description='Generate synthesis results for ' + modname) + parser.add_argument('--top', type=str, default='TORUS', help='Topologies (CSV)') + parser.add_argument('--ports', type=str, default='8', help='Number of ports (CSV)') + parser.add_argument('--dataw', type=str, default='32', help='Router datapath width (CSV)') + parser.add_argument('--mtu', type=str, default='5', help='MTU (CSV)') + parser.add_argument('--ralloc', type=str, default='WORMHOLE', help='Router allocation method (CSV)') + return parser.parse_args() + +def main(): + args = get_options() + keys = ['top', 'ports', 'dataw', 'mtu', 'ralloc'] + for top in args.top.strip().split(','): + for ports in args.ports.strip().split(','): + for dataw in args.dataw.strip().split(','): + for mtu in args.mtu.strip().split(','): + for ralloc in args.ralloc.strip().split(','): + # Collect parameters + transform = {'ports':ports, 'dataw':dataw, 'mtu':mtu, 'top':top, 'ralloc':ralloc} + synth_run.synth_run(modname, keys, transform) + +if __name__ == '__main__': + main() diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py new file mode 100755 index 000000000..668e7a247 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py @@ -0,0 +1,37 @@ +#! /usr/bin/python3 +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +import argparse +import synth_run + +modname = 'chdr_crossbar_nxn' + +# Parse command line options +def get_options(): + parser = argparse.ArgumentParser(description='Generate synthesis results for ' + modname) + parser.add_argument('--opt', type=str, default='AREA', help='Optimization strategies (CSV)') + parser.add_argument('--ports', type=str, default='8', help='Number of ports (CSV)') + parser.add_argument('--dataw', type=str, default='64', help='Router datapath width (CSV)') + parser.add_argument('--mtu', type=str, default='10', help='MTU or Ingress buffer size (CSV)') + parser.add_argument('--rlutsize', type=str, default='6', help='Router lookup table size (CSV)') + return parser.parse_args() + +def main(): + args = get_options() + keys = ['opt', 'ports', 'dataw', 'mtu', 'rlutsize'] + for opt in args.opt.strip().split(','): + for ports in args.ports.strip().split(','): + for dataw in args.dataw.strip().split(','): + for mtu in args.mtu.strip().split(','): + for rlutsize in args.rlutsize.strip().split(','): + # Collect parameters + transform = {'opt':opt, 'ports':ports, 'dataw':dataw, 'mtu':mtu, 'rlutsize':rlutsize} + synth_run.synth_run(modname, keys, transform) + +if __name__ == '__main__': + main() diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py new file mode 100644 index 000000000..a9801ac20 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py @@ -0,0 +1,67 @@ +#! /usr/bin/python3 +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +import sys, os +import subprocess +import re + +def synth_run(modname, keys, transform): + prefix = modname + '_' + ('_'.join(['%s%s'%(k,transform[k]) for k in keys])) + print('='*(len(prefix)+2)) + print(' %s '%(prefix)) + print('='*(len(prefix)+2)) + # Write Verilog top-level file + with open(modname + '_top.v.in', 'r') as in_file: + with open(modname + '_top.v', 'w') as out_file: + out_file.write(in_file.read().format(**transform)) + # Run Vivado + exitcode = subprocess.Popen( + 'vivado -mode tcl -source %s_top.tcl -nolog -nojou'%(modname), shell=True + ).wait() + if exitcode != 0: + raise RuntimeError('Error running vivado. Was setupenv.sh run?') + # Extract info + lut = 100.0 + reg = 100.0 + bram = 100.0 + dsp = 100.0 + fmax = 0.0 + with open(modname + '.rpt', 'r') as rpt_file: + rpt = rpt_file.readlines() + for line in rpt: + lm = re.match(r'.*Slice LUTs\*.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line) + if lm is not None: + lut = float(lm.group(1).strip()) + rm = re.match(r'.*Slice Registers.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line) + if rm is not None: + reg = float(rm.group(1).strip()) + bm = re.match(r'.*Block RAM Tile.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line) + if bm is not None: + bram = float(bm.group(1).strip()) + dm = re.match(r'.*DSPs.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line) + if dm is not None: + dsp = float(dm.group(1).strip()) + tm = re.match(r'.*clk.*\| clk\s*\|(.*)\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|', line) + if tm is not None: + fmax = 1000.0/float(tm.group(1).strip()) + # Save report + os.rename(modname + '.rpt', prefix + '.rpt') + os.rename(modname + '.dcp', prefix + '.dcp') + try: + os.remove(modname + '_top.v') + os.remove('fsm_encoding.os') + except FileNotFoundError: + pass + # Write summary report line + res_keys = ['lut','reg','bram','dsp','fmax'] + res = {'lut':lut, 'reg':reg, 'bram':bram, 'dsp':dsp, 'fmax':fmax, 'prefix':prefix} + if not os.path.exists(modname + '_summary.csv'): + with open(modname + '_summary.csv', 'w') as summaryf: + summaryf.write((','.join(keys + res_keys)) + '\n') + with open(modname + '_summary.csv', 'a') as summaryf: + summaryf.write((','.join(['%s'%(transform[k]) for k in keys])) + ',' + (','.join(['%.1f'%(res[k]) for k in res_keys])) + '\n') diff --git a/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v new file mode 100644 index 000000000..cd70450a0 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v @@ -0,0 +1,338 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: torus_2d_dor_router_multi_sw +// Description: +// Alternate implementation for torus_2d_dor_router_single_sw with +// multiple switches for independent paths between inputs and outputs +// **NOTE**: This module has not been validated + +module torus_2d_dor_router_multi_sw #( + parameter WIDTH = 64, + parameter DIM_SIZE = 4, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0, + parameter TERM_BUFF_SIZE = 5, + parameter XB_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE" +) ( + // Clocks and resets + input wire clk, + input wire reset, + + // Terminal connections + input wire [WIDTH-1:0] s_axis_term_tdata, + input wire s_axis_term_tlast, + input wire s_axis_term_tvalid, + output wire s_axis_term_tready, + output wire [WIDTH-1:0] m_axis_term_tdata, + output wire m_axis_term_tlast, + output wire m_axis_term_tvalid, + input wire m_axis_term_tready, + + // X-dimension inter-XB connections + input wire [WIDTH-1:0] s_axis_xdim_tdata, + input wire [0:0] s_axis_xdim_tdest, + input wire s_axis_xdim_tlast, + input wire s_axis_xdim_tvalid, + output wire s_axis_xdim_tready, + output wire [WIDTH-1:0] m_axis_xdim_tdata, + output wire [0:0] m_axis_xdim_tdest, + output wire m_axis_xdim_tlast, + output wire m_axis_xdim_tvalid, + input wire m_axis_xdim_tready, + + // Y-dimension inter-XB connections + input wire [WIDTH-1:0] s_axis_ydim_tdata, + input wire [0:0] s_axis_ydim_tdest, + input wire s_axis_ydim_tlast, + input wire s_axis_ydim_tvalid, + output wire s_axis_ydim_tready, + output wire [WIDTH-1:0] m_axis_ydim_tdata, + output wire [0:0] m_axis_ydim_tdest, + output wire m_axis_ydim_tlast, + output wire m_axis_ydim_tvalid, + input wire m_axis_ydim_tready +); + + // ------------------------------------------------- + // Routing functions + // ------------------------------------------------- + `include "mesh_node_mapping.vh" + + function [2:0] term_in_route; + input [WIDTH:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst, xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, 2 LSBs are the router destination + // - Long journeys get VC = 1 to bypass local traffic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + term_in_route = {1'b0 /* VC don't care */, 2'd2 /* term out */}; + end else if (xdst == XB_ADDR_X) begin + term_in_route = {ydiff[$clog2(DIM_SIZE)-1], 2'd0 /* ydim out */}; + end else begin + term_in_route = {xdiff[$clog2(DIM_SIZE)-1], 2'd1 /* xdim out */}; + end + end + endfunction + + function [2:0] xdim_in_route; + input [WIDTH:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst, xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, 2 LSBs are the router destination + // - Long journeys get VC = 1 to bypass local traffic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + xdim_in_route = {1'b0 /* VC don't care */, 2'd2 /* term out */}; + end else if (xdst == XB_ADDR_X) begin + xdim_in_route = {ydiff[$clog2(DIM_SIZE)-1], 2'd0 /* ydim out */}; + end else begin + xdim_in_route = {xdiff[$clog2(DIM_SIZE)-1], 2'd1 /* xdim out */}; + end + end + endfunction + + function [1:0] ydim_in_route; + input [WIDTH:0] header; + reg [$clog2(DIM_SIZE)-1:0] ydst, ydiff; + begin + ydst = node_to_ydst(header); + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, LSB is the router destination + // - Long journeys get VC = 1 to bypass local traffic + if (ydst == XB_ADDR_Y) begin + ydim_in_route = {1'b0 /* VC don't care */, 1'd1 /* term out */}; + end else begin + ydim_in_route = {ydiff[$clog2(DIM_SIZE)-1], 1'd0 /* ydim out */}; + end + end + endfunction + + // ------------------------------------------------- + // Input demuxes + // ------------------------------------------------- + wire [WIDTH-1:0] ti_gt_tdata; + wire ti_gt_tdest; + wire ti_gt_tlast; + wire ti_gt_tvalid; + wire ti_gt_tready; + wire [WIDTH-1:0] t2t_tdata, t2x_tdata, t2y_tdata; + wire t2t_tdest, t2x_tdest, t2y_tdest; + wire t2t_tlast, t2x_tlast, t2y_tlast; + wire t2t_tvalid, t2x_tvalid, t2y_tvalid; + wire t2t_tready, t2x_tready, t2y_tready; + wire [WIDTH-1:0] term_in_hdr; + wire [1:0] term_in_port; + + assign {ti_gt_tdest, term_in_port} = term_in_route(term_in_hdr); + + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE) + ) term_in_pkt_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_term_tdata), + .i_tlast (s_axis_term_tlast), + .i_tvalid (s_axis_term_tvalid), + .i_tready (s_axis_term_tready), + .i_terror (1'b0), + .o_tdata (ti_gt_tdata), + .o_tlast (ti_gt_tlast), + .o_tvalid (ti_gt_tvalid), + .o_tready (ti_gt_tready) + ); + + axi_demux #( + .WIDTH(WIDTH+1), .SIZE(3), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) term_in_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (term_in_hdr), + .dest (term_in_port), + .i_tdata ({ti_gt_tdest, ti_gt_tdata}), + .i_tlast (ti_gt_tlast), + .i_tvalid (ti_gt_tvalid), + .i_tready (ti_gt_tready), + .o_tdata ({t2t_tdest, t2t_tdata, t2x_tdest, t2x_tdata, t2y_tdest, t2y_tdata}), + .o_tlast ({t2t_tlast, t2x_tlast, t2y_tlast}), + .o_tvalid ({t2t_tvalid, t2x_tvalid, t2y_tvalid}), + .o_tready ({t2t_tready, t2x_tready, t2y_tready}) + ); + + wire [WIDTH-1:0] xi_gt_tdata; + wire xi_gt_tdest; + wire xi_gt_tlast; + wire xi_gt_tvalid; + wire xi_gt_tready; + wire [WIDTH-1:0] x2t_tdata, x2x_tdata, x2y_tdata; + wire x2t_tdest, x2x_tdest, x2y_tdest; + wire x2t_tlast, x2x_tlast, x2y_tlast; + wire x2t_tvalid, x2x_tvalid, x2y_tvalid; + wire x2t_tready, x2x_tready, x2y_tready; + wire [WIDTH-1:0] xdim_in_hdr; + wire [1:0] xdim_in_port; + + assign {xi_gt_tdest, xdim_in_port} = xdim_in_route(xdim_in_hdr); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) xdim_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_xdim_tdata), + .s_axis_tdest (s_axis_xdim_tdest), + .s_axis_tlast (s_axis_xdim_tlast), + .s_axis_tvalid (s_axis_xdim_tvalid), + .s_axis_tready (s_axis_xdim_tready), + .m_axis_tdata (xi_gt_tdata), + .m_axis_tlast (xi_gt_tlast), + .m_axis_tvalid (xi_gt_tvalid), + .m_axis_tready (xi_gt_tready) + ); + + axi_demux #( + .WIDTH(WIDTH+1), .SIZE(3), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) xdim_in_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (xdim_in_hdr), + .dest (xdim_in_port), + .i_tdata ({xi_gt_tdest, xi_gt_tdata}), + .i_tlast (xi_gt_tlast), + .i_tvalid (xi_gt_tvalid), + .i_tready (xi_gt_tready), + .o_tdata ({x2t_tdest, x2t_tdata, x2x_tdest, x2x_tdata, x2y_tdest, x2y_tdata}), + .o_tlast ({x2t_tlast, x2x_tlast, x2y_tlast}), + .o_tvalid ({x2t_tvalid, x2x_tvalid, x2y_tvalid}), + .o_tready ({x2t_tready, x2x_tready, x2y_tready}) + ); + + wire [WIDTH-1:0] yi_gt_tdata; + wire yi_gt_tdest; + wire yi_gt_tlast; + wire yi_gt_tvalid; + wire yi_gt_tready; + wire [WIDTH-1:0] y2t_tdata, y2y_tdata; + wire y2t_tdest, y2y_tdest; + wire y2t_tlast, y2y_tlast; + wire y2t_tvalid, y2y_tvalid; + wire y2t_tready, y2y_tready; + wire [WIDTH-1:0] ydim_in_hdr; + wire [0:0] ydim_in_port; + + assign {yi_gt_tdest, ydim_in_port} = ydim_in_route(ydim_in_hdr); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) ydim_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_ydim_tdata ), + .s_axis_tdest (s_axis_ydim_tdest ), + .s_axis_tlast (s_axis_ydim_tlast ), + .s_axis_tvalid (s_axis_ydim_tvalid), + .s_axis_tready (s_axis_ydim_tready), + .m_axis_tdata (yi_gt_tdata ), + .m_axis_tlast (yi_gt_tlast ), + .m_axis_tvalid (yi_gt_tvalid), + .m_axis_tready (yi_gt_tready) + ); + + axi_demux #( + .WIDTH(WIDTH+1), .SIZE(2), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) ydim_in_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (ydim_in_hdr), + .dest (ydim_in_port), + .i_tdata ({yi_gt_tdest, yi_gt_tdata}), + .i_tlast (yi_gt_tlast), + .i_tvalid (yi_gt_tvalid), + .i_tready (yi_gt_tready), + .o_tdata ({y2t_tdest, y2t_tdata, y2y_tdest, y2y_tdata}), + .o_tlast ({y2t_tlast, y2y_tlast}), + .o_tvalid ({y2t_tvalid, y2y_tvalid}), + .o_tready ({y2t_tready, y2y_tready}) + ); + + // ------------------------------------------------- + // Output muxes + // ------------------------------------------------- + wire term_tdest_discard; + axi_mux #( + .WIDTH(WIDTH+1), .SIZE(3), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) term_out_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2t_tdest, t2t_tdata, x2t_tdest, x2t_tdata, y2t_tdest, y2t_tdata}), + .i_tlast ({t2t_tlast, x2t_tlast, y2t_tlast }), + .i_tvalid ({t2t_tvalid, x2t_tvalid, y2t_tvalid}), + .i_tready ({t2t_tready, x2t_tready, y2t_tready}), + .o_tdata ({term_tdest_discard, m_axis_term_tdata}), + .o_tlast (m_axis_term_tlast), + .o_tvalid (m_axis_term_tvalid), + .o_tready (m_axis_term_tready) + ); + + axi_mux #( + .WIDTH(WIDTH+1), .SIZE(2), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) xdim_out_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2x_tdest, t2x_tdata, x2x_tdest, x2x_tdata}), + .i_tlast ({t2x_tlast, x2x_tlast}), + .i_tvalid ({t2x_tvalid, x2x_tvalid}), + .i_tready ({t2x_tready, x2x_tready}), + .o_tdata ({m_axis_xdim_tdest, m_axis_xdim_tdata}), + .o_tlast (m_axis_xdim_tlast ), + .o_tvalid (m_axis_xdim_tvalid), + .o_tready (m_axis_xdim_tready) + ); + + axi_mux #( + .WIDTH(WIDTH+1), .SIZE(3), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) ydim_out_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2y_tdest, t2y_tdata, x2y_tdest, x2y_tdata, y2y_tdest, y2y_tdata}), + .i_tlast ({t2y_tlast, x2y_tlast, y2y_tlast }), + .i_tvalid ({t2y_tvalid, x2y_tvalid, y2y_tvalid}), + .i_tready ({t2y_tready, x2y_tready, y2y_tready}), + .o_tdata ({m_axis_ydim_tdest, m_axis_ydim_tdata}), + .o_tlast (m_axis_ydim_tlast), + .o_tvalid (m_axis_ydim_tvalid), + .o_tready (m_axis_ydim_tready) + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v new file mode 100644 index 000000000..21a66782d --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v @@ -0,0 +1,294 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: torus_2d_dor_router_single_sw +// Description: +// This module implements the router for a 2-dimentional (2d) +// torus network that uses dimension order routing (dor) and has a +// single underlying switch (single_sw). It uses AXI-Stream for all of its +// links. +// The torus topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - WIDTH: Width of the AXI-Stream data bus +// - DIM_SIZE: Number of routers alone one dimension +// - XB_ADDR_X: The X-coordinate of this router in the topology +// - XB_ADDR_Y: The Y-coordinate of this router in the topology +// - TERM_BUFF_SIZE: log2 of the ingress terminal buffer size (in words) +// - XB_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words) +// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers. +// * WORMHOLE: Allocate route as soon as first word in pkt arrives +// * CUT-THROUGH: Allocate route only after the full pkt arrives +// - SWITCH_ALLOC: Algorithm to allocate the switch +// * PRIO: Priority based. Priority: Y-dim > X-dim > Term +// * ROUND-ROBIN: Round robin input port allocation +// Signals: +// - *_axis_term_*: Terminal ports (master/slave) +// - *_axis_xdim_*: Inter-router X-dim connections (master/slave) +// - *_axis_ydim_*: Inter-router Y-dim connections (master/slave) +// + +module torus_2d_dor_router_single_sw #( + parameter WIDTH = 64, + parameter DIM_SIZE = 4, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0, + parameter TERM_BUFF_SIZE = 5, + parameter XB_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", + parameter SWITCH_ALLOC = "PRIO" +) ( + // Clocks and resets + input wire clk, + input wire reset, + + // Terminal connections + input wire [WIDTH-1:0] s_axis_term_tdata, + input wire s_axis_term_tlast, + input wire s_axis_term_tvalid, + output wire s_axis_term_tready, + output wire [WIDTH-1:0] m_axis_term_tdata, + output wire m_axis_term_tlast, + output wire m_axis_term_tvalid, + input wire m_axis_term_tready, + + // X-dimension inter-XB connections + input wire [WIDTH-1:0] s_axis_xdim_tdata, + input wire [0:0] s_axis_xdim_tdest, + input wire s_axis_xdim_tlast, + input wire s_axis_xdim_tvalid, + output wire s_axis_xdim_tready, + output wire [WIDTH-1:0] m_axis_xdim_tdata, + output wire [0:0] m_axis_xdim_tdest, + output wire m_axis_xdim_tlast, + output wire m_axis_xdim_tvalid, + input wire m_axis_xdim_tready, + + // Y-dimension inter-XB connections + input wire [WIDTH-1:0] s_axis_ydim_tdata, + input wire [0:0] s_axis_ydim_tdest, + input wire s_axis_ydim_tlast, + input wire s_axis_ydim_tvalid, + output wire s_axis_ydim_tready, + output wire [WIDTH-1:0] m_axis_ydim_tdata, + output wire [0:0] m_axis_ydim_tdest, + output wire m_axis_ydim_tlast, + output wire m_axis_ydim_tvalid, + input wire m_axis_ydim_tready +); + + //------------------------------------------------- + // Routing and switch allocation functions + //------------------------------------------------- + + // mesh_node_mapping.vh file contains the mapping between the node number + // and its XY coordinates. It is autogenerated and defines the node_to_xdst + // and node_to_ydst functions. + `include "mesh_node_mapping.vh" + + localparam [1:0] SW_DEST_TERM = 2'd0; + localparam [1:0] SW_DEST_XDIM = 2'd1; + localparam [1:0] SW_DEST_YDIM = 2'd2; + localparam [1:0] SW_NUM_DESTS = 2'd3; + + // The compute_switch_tdest function is the destination selector + // i.e. it will inspecte the bottom $clog2(DIM_SIZE)*2 bits of the + // first word of a packet and determine the destination of the packet. + function [2:0] compute_switch_tdest; + input [WIDTH-1:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, 2 LSBs are the router destination + // - Long journeys get VC = 1 to bypass local traffic + if (xdiff == 'd0 && ydiff == 'd0) begin + compute_switch_tdest = {1'b0 /* VC don't care */, SW_DEST_TERM}; + end else if (xdiff != 'd0) begin + compute_switch_tdest = {(xdiff < 0), SW_DEST_XDIM}; + end else begin + compute_switch_tdest = {(ydiff < 0), SW_DEST_YDIM}; + end + //$display("xdst=%d, ydst=%d, xaddr=%d, yaddr=%d, dst=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y, compute_switch_tdest); + end + endfunction + + // The compute_switch_alloc function is the switch allocation function + // i.e. it chooses which input port reserves the switch for packet transfer. + // After the switch is allocated, all other ports will be backpressured until + // the packet finishes transferring. + function [1:0] compute_switch_alloc; + input [2:0] pkt_waiting; + input [1:0] last_alloc; + begin + if (pkt_waiting == 3'b000) begin + compute_switch_alloc = SW_DEST_TERM; + end else if (pkt_waiting == 3'b001) begin + compute_switch_alloc = SW_DEST_TERM; + end else if (pkt_waiting == 3'b010) begin + compute_switch_alloc = SW_DEST_XDIM; + end else if (pkt_waiting == 3'b100) begin + compute_switch_alloc = SW_DEST_YDIM; + end else begin + if (SWITCH_ALLOC == "PRIO") begin + // Priority: Y-dim > X-dim > Term + if (pkt_waiting[SW_DEST_YDIM]) + compute_switch_alloc = SW_DEST_YDIM; + else if (pkt_waiting[SW_DEST_XDIM]) + compute_switch_alloc = SW_DEST_XDIM; + else + compute_switch_alloc = SW_DEST_TERM; + end else begin + // Round-robin + if (pkt_waiting[(last_alloc + 3'd1) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd1) % SW_NUM_DESTS; + else if (pkt_waiting[(last_alloc + 3'd2) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd2) % SW_NUM_DESTS; + else + compute_switch_alloc = last_alloc; + end + end + end + endfunction + + //------------------------------------------------- + // Ingress buffers + //------------------------------------------------- + wire [WIDTH-1:0] ydim_in_data , xdim_in_data , term_in_data ; + wire [2:0] ydim_in_dest , xdim_in_dest , term_in_dest ; + wire ydim_in_last , xdim_in_last , term_in_last ; + wire ydim_in_valid, xdim_in_valid, term_in_valid; + wire ydim_in_ready, xdim_in_ready, term_in_ready; + + // Data coming in from the terminal is gated until a full packet arrives + // in order to minimize the switch allocation time per packet. + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE) + ) term_in_pkt_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_term_tdata), + .i_tlast (s_axis_term_tlast), + .i_tvalid (s_axis_term_tvalid), + .i_tready (s_axis_term_tready), + .i_terror (1'b0), + .o_tdata (term_in_data), + .o_tlast (term_in_last), + .o_tvalid (term_in_valid), + .o_tready (term_in_ready) + ); + assign term_in_dest = compute_switch_tdest(term_in_data); + + // The XY directions have buffers with 2 virtual channels to minimize the + // possibility of a deadlock. + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) xdim_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_xdim_tdata), + .s_axis_tdest (s_axis_xdim_tdest), + .s_axis_tlast (s_axis_xdim_tlast), + .s_axis_tvalid (s_axis_xdim_tvalid), + .s_axis_tready (s_axis_xdim_tready), + .m_axis_tdata (xdim_in_data), + .m_axis_tlast (xdim_in_last), + .m_axis_tvalid (xdim_in_valid), + .m_axis_tready (xdim_in_ready) + ); + assign xdim_in_dest = compute_switch_tdest(xdim_in_data); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) ydim_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_ydim_tdata ), + .s_axis_tdest (s_axis_ydim_tdest ), + .s_axis_tlast (s_axis_ydim_tlast ), + .s_axis_tvalid (s_axis_ydim_tvalid), + .s_axis_tready (s_axis_ydim_tready), + .m_axis_tdata (ydim_in_data ), + .m_axis_tlast (ydim_in_last ), + .m_axis_tvalid (ydim_in_valid), + .m_axis_tready (ydim_in_ready) + ); + assign ydim_in_dest = compute_switch_tdest(ydim_in_data); + + //------------------------------------------------- + // Switch + //------------------------------------------------- + + // Track the input packet state + localparam [0:0] PKT_ST_HEAD = 1'b0; + localparam [0:0] PKT_ST_BODY = 1'b1; + reg [0:0] pkt_state = PKT_ST_HEAD; + + // The switch only accept packets on a single port at a time. + wire sw_in_ready = |({ydim_in_ready, xdim_in_ready, term_in_ready}); + wire sw_in_valid = |({ydim_in_valid, xdim_in_valid, term_in_valid}); + wire sw_in_last = |({ydim_in_last&ydim_in_valid, xdim_in_last&xdim_in_valid, term_in_last&term_in_valid}); + + always @(posedge clk) begin + if (reset) begin + pkt_state <= PKT_ST_HEAD; + end else if (sw_in_valid & sw_in_ready) begin + pkt_state <= sw_in_last ? PKT_ST_HEAD : PKT_ST_BODY; + end + end + + // The switch requires the allocation to stay valid until the + // end of the packet. We also might need to keep the previous + // packet's allocation to compute the current one + wire [1:0] switch_alloc; + reg [1:0] prev_switch_alloc = SW_DEST_TERM; + reg [1:0] pkt_switch_alloc = SW_DEST_TERM; + + always @(posedge clk) begin + if (reset) begin + prev_switch_alloc <= SW_DEST_TERM; + pkt_switch_alloc <= SW_DEST_TERM; + end else if (sw_in_valid & sw_in_ready) begin + if (pkt_state == PKT_ST_HEAD) + pkt_switch_alloc <= switch_alloc; + if (sw_in_last) + prev_switch_alloc <= switch_alloc; + end + end + + assign switch_alloc = (sw_in_valid && pkt_state == PKT_ST_HEAD) ? + compute_switch_alloc({ydim_in_valid, xdim_in_valid, term_in_valid}, prev_switch_alloc) : + pkt_switch_alloc; + + wire term_tdest_discard; + axis_switch #( + .DATA_W(WIDTH), .DEST_W(1), .IN_PORTS(3), .OUT_PORTS(3) + ) switch_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata ({ydim_in_data , xdim_in_data , term_in_data }), + .s_axis_tdest ({ydim_in_dest , xdim_in_dest , term_in_dest }), + .s_axis_tlast ({ydim_in_last , xdim_in_last , term_in_last }), + .s_axis_tvalid ({ydim_in_valid, xdim_in_valid, term_in_valid}), + .s_axis_tready ({ydim_in_ready, xdim_in_ready, term_in_ready}), + .s_axis_alloc (switch_alloc), + .m_axis_tdata ({m_axis_ydim_tdata, m_axis_xdim_tdata, m_axis_term_tdata }), + .m_axis_tdest ({m_axis_ydim_tdest, m_axis_xdim_tdest, term_tdest_discard}), + .m_axis_tlast ({m_axis_ydim_tlast, m_axis_xdim_tlast, m_axis_term_tlast }), + .m_axis_tvalid ({m_axis_ydim_tvalid, m_axis_xdim_tvalid, m_axis_term_tvalid}), + .m_axis_tready ({m_axis_ydim_tready, m_axis_xdim_tready, m_axis_term_tready}) + ); + +endmodule + -- cgit v1.2.3