aboutsummaryrefslogtreecommitdiffstats
path: root/fpga/usrp3/lib/rfnoc/crossbar
diff options
context:
space:
mode:
Diffstat (limited to 'fpga/usrp3/lib/rfnoc/crossbar')
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs25
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/README.pdfbin0 -> 1714398 bytes
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v288
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v130
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v178
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v44
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v164
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v381
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v259
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v122
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile52
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile51
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv26
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile51
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv26
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv150
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv202
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv428
-rwxr-xr-xfpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py169
-rwxr-xr-xfpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py106
-rwxr-xr-xfpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py125
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v481
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v398
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh294
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl18
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in47
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl18
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in55
-rwxr-xr-xfpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py37
-rwxr-xr-xfpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py37
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py67
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v338
-rw-r--r--fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v294
33 files changed, 5061 insertions, 0 deletions
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs b/fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs
new file mode 100644
index 000000000..6fa49cd04
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs
@@ -0,0 +1,25 @@
+#
+# Copyright 2018 Ettus Research, a National Instruments Company
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+
+##################################################
+# Crossbar Sources
+##################################################
+RFNOC_XBAR_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/crossbar/, \
+axis_ctrl_crossbar_2d_mesh.v \
+axis_ctrl_crossbar_nxn.v \
+torus_2d_dor_router_single_sw.v \
+mesh_2d_dor_router_single_sw.v \
+axis_ingress_vc_buff.v \
+axis_switch.v \
+axis_port_terminator.v \
+chdr_crossbar_nxn.v \
+chdr_xb_ingress_buff.v \
+chdr_xb_routing_table.v \
+))
+
+# Unused sources
+# torus_2d_dor_router_multi_sw.v \
+# mesh_2d_dor_router_multi_sw.v \
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/README.pdf b/fpga/usrp3/lib/rfnoc/crossbar/README.pdf
new file mode 100644
index 000000000..838702bd1
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/README.pdf
Binary files differ
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v
new file mode 100644
index 000000000..e69bdfe3c
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v
@@ -0,0 +1,288 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: axis_ctrl_crossbar_2d_mesh
+// Description:
+// This module implements a 2-dimentional (2d) mesh network (mesh) crossbar
+// for AXIS-CTRL traffic. Supports mesh and torus topologies.
+// It uses AXI-Stream for all of its links.
+// The torus topology, routing algorithms and the router architecture is
+// described in README.md in this directory.
+// Parameters:
+// - WIDTH: Width of the AXI-Stream data bus
+// - DIM_SIZE: Number of routers alone one dimension (# Nodes = DIM_SIZE * DIM_SIZE)
+// - TOPOLOGY: Is this a mesh (MESH) or a torus (TORUS) topology
+// - INGRESS_BUFF_SIZE: log2 of the ingress terminal buffer size (in words)
+// - ROUTER_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words)
+// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers.
+// * WORMHOLE: Allocate route as soon as first word in pkt arrives
+// * CUT-THROUGH: Allocate route only after the full pkt arrives
+// - SWITCH_ALLOC: Algorithm to allocate the switch
+// * PRIO: Priority based. Priority: Y-dim > X-dim > Term
+// * ROUND-ROBIN: Round robin input port allocation
+// - DEADLOCK_TIMEOUT: Number of cycles to wait until a deadlock is detected
+// Signals:
+// - s_axis_*: Slave port for router (flattened)
+// - m_axis_*: Master port for router (flattened)
+//
+
+module axis_ctrl_crossbar_2d_mesh #(
+ parameter DIM_SIZE = 4,
+ parameter WIDTH = 64,
+ parameter TOPOLOGY = "MESH",
+ parameter INGRESS_BUFF_SIZE = 5,
+ parameter ROUTER_BUFF_SIZE = 5,
+ parameter ROUTING_ALLOC = "WORMHOLE",
+ parameter SWITCH_ALLOC = "PRIO",
+ parameter DEADLOCK_TIMEOUT = 16384
+) (
+ input wire clk,
+ input wire reset,
+ // Inputs
+ input wire [(DIM_SIZE*DIM_SIZE*WIDTH)-1:0] s_axis_tdata,
+ input wire [DIM_SIZE*DIM_SIZE-1:0] s_axis_tlast,
+ input wire [DIM_SIZE*DIM_SIZE-1:0] s_axis_tvalid,
+ output wire [DIM_SIZE*DIM_SIZE-1:0] s_axis_tready,
+ // Output
+ output wire [(DIM_SIZE*DIM_SIZE*WIDTH)-1:0] m_axis_tdata,
+ output wire [DIM_SIZE*DIM_SIZE-1:0] m_axis_tlast,
+ output wire [DIM_SIZE*DIM_SIZE-1:0] m_axis_tvalid,
+ input wire [DIM_SIZE*DIM_SIZE-1:0] m_axis_tready,
+ // Deadlock alert
+ output wire deadlock_detected
+);
+
+ `include "mesh_node_mapping.vh"
+
+ //-------------------------------------------------------
+ // Unflatten input and output ports
+ //-------------------------------------------------------
+
+ wire [WIDTH-1:0] i_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire i_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire i_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire i_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1];
+
+ wire [WIDTH-1:0] o_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire o_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire o_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire o_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1];
+
+ wire clear_routers = deadlock_detected;
+
+ genvar p,x,y;
+ generate
+ for (p = 0; p < DIM_SIZE*DIM_SIZE; p=p+1) begin
+ assign i_tdata_arr[node_to_ydst(p)][node_to_xdst(p)] = s_axis_tdata[p*WIDTH +: WIDTH];
+ assign i_tlast_arr[node_to_ydst(p)][node_to_xdst(p)] = s_axis_tlast[p];
+ assign i_tvalid_arr[node_to_ydst(p)][node_to_xdst(p)] = s_axis_tvalid[p];
+ assign s_axis_tready[p] = i_tready_arr[node_to_ydst(p)][node_to_xdst(p)] | clear_routers;
+
+ assign m_axis_tdata[p*WIDTH +: WIDTH] = o_tdata_arr[node_to_ydst(p)][node_to_xdst(p)];
+ assign m_axis_tlast[p] = o_tlast_arr [node_to_ydst(p)][node_to_xdst(p)];
+ assign m_axis_tvalid[p] = o_tvalid_arr[node_to_ydst(p)][node_to_xdst(p)] & ~clear_routers;
+ assign o_tready_arr[node_to_ydst(p)][node_to_xdst(p)] = m_axis_tready[p];
+ end
+ endgenerate
+
+ //-------------------------------------------------------
+ // Instantiate routers
+ //-------------------------------------------------------
+
+ wire [WIDTH-1:0] e2w_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire e2w_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire e2w_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire e2w_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire e2w_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1];
+
+ wire [WIDTH-1:0] w2e_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire w2e_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire w2e_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire w2e_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire w2e_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1];
+
+ wire [WIDTH-1:0] n2s_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire n2s_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire n2s_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire n2s_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire n2s_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1];
+
+ wire [WIDTH-1:0] s2n_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire s2n_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire s2n_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire s2n_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1];
+ wire s2n_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1];
+
+ localparam N = DIM_SIZE;
+ localparam NEND = DIM_SIZE - 1;
+ localparam [WIDTH-1:0] ZERO = {WIDTH{1'b0}};
+
+ generate
+ for (y = 0; y < DIM_SIZE; y=y+1) begin: ydim
+ for (x = 0; x < DIM_SIZE; x=x+1) begin: xdim
+ if (TOPOLOGY == "MESH") begin
+ mesh_2d_dor_router_single_sw #(
+ .WIDTH (WIDTH),
+ .DIM_SIZE (DIM_SIZE),
+ .XB_ADDR_X (x),
+ .XB_ADDR_Y (y),
+ .TERM_BUFF_SIZE (INGRESS_BUFF_SIZE),
+ .XB_BUFF_SIZE (ROUTER_BUFF_SIZE),
+ .ROUTING_ALLOC (ROUTING_ALLOC),
+ .SWITCH_ALLOC (SWITCH_ALLOC)
+ ) rtr_i (
+ // Clock and reset
+ .clk (clk),
+ .reset (reset | clear_routers),
+ // Terminals
+ .s_axis_ter_tdata (i_tdata_arr [y][x]),
+ .s_axis_ter_tlast (i_tlast_arr [y][x]),
+ .s_axis_ter_tvalid (i_tvalid_arr[y][x]),
+ .s_axis_ter_tready (i_tready_arr[y][x]),
+ .m_axis_ter_tdata (o_tdata_arr [y][x]),
+ .m_axis_ter_tlast (o_tlast_arr [y][x]),
+ .m_axis_ter_tvalid (o_tvalid_arr[y][x]),
+ .m_axis_ter_tready (o_tready_arr[y][x]),
+ // West connections
+ .s_axis_wst_tdata ((x != 0) ? e2w_tdata_arr [y][x] : ZERO),
+ .s_axis_wst_tdest ((x != 0) ? e2w_tdest_arr [y][x] : 1'b0),
+ .s_axis_wst_tlast ((x != 0) ? e2w_tlast_arr [y][x] : 1'b0),
+ .s_axis_wst_tvalid ((x != 0) ? e2w_tvalid_arr[y][x] : 1'b0),
+ .s_axis_wst_tready ( e2w_tready_arr[y][x] ),
+ .m_axis_wst_tdata ( w2e_tdata_arr [y][(x+N-1)%N] ),
+ .m_axis_wst_tdest ( w2e_tdest_arr [y][(x+N-1)%N] ),
+ .m_axis_wst_tlast ( w2e_tlast_arr [y][(x+N-1)%N] ),
+ .m_axis_wst_tvalid ( w2e_tvalid_arr[y][(x+N-1)%N] ),
+ .m_axis_wst_tready ((x != 0) ? w2e_tready_arr[y][(x+N-1)%N] : 1'b1),
+ // East connections
+ .s_axis_est_tdata ((x != NEND) ? w2e_tdata_arr [y][x] : ZERO),
+ .s_axis_est_tdest ((x != NEND) ? w2e_tdest_arr [y][x] : 1'b0),
+ .s_axis_est_tlast ((x != NEND) ? w2e_tlast_arr [y][x] : 1'b0),
+ .s_axis_est_tvalid ((x != NEND) ? w2e_tvalid_arr[y][x] : 1'b0),
+ .s_axis_est_tready ( w2e_tready_arr[y][x] ),
+ .m_axis_est_tdata ( e2w_tdata_arr [y][(x+1)%N] ),
+ .m_axis_est_tdest ( e2w_tdest_arr [y][(x+1)%N] ),
+ .m_axis_est_tlast ( e2w_tlast_arr [y][(x+1)%N] ),
+ .m_axis_est_tvalid ( e2w_tvalid_arr[y][(x+1)%N] ),
+ .m_axis_est_tready ((x != NEND) ? e2w_tready_arr[y][(x+1)%N] : 1'b1),
+ // North connections
+ .s_axis_nor_tdata ((y != 0) ? s2n_tdata_arr [y][x] : ZERO),
+ .s_axis_nor_tdest ((y != 0) ? s2n_tdest_arr [y][x] : 1'b0),
+ .s_axis_nor_tlast ((y != 0) ? s2n_tlast_arr [y][x] : 1'b0),
+ .s_axis_nor_tvalid ((y != 0) ? s2n_tvalid_arr[y][x] : 1'b0),
+ .s_axis_nor_tready ( s2n_tready_arr[y][x] ),
+ .m_axis_nor_tdata ( n2s_tdata_arr [(y+N-1)%N][x] ),
+ .m_axis_nor_tdest ( n2s_tdest_arr [(y+N-1)%N][x] ),
+ .m_axis_nor_tlast ( n2s_tlast_arr [(y+N-1)%N][x] ),
+ .m_axis_nor_tvalid ( n2s_tvalid_arr[(y+N-1)%N][x] ),
+ .m_axis_nor_tready ((y != 0) ? n2s_tready_arr[(y+N-1)%N][x] : 1'b1),
+ // South connections
+ .s_axis_sou_tdata ((y != NEND) ? n2s_tdata_arr [y][x] : ZERO),
+ .s_axis_sou_tdest ((y != NEND) ? n2s_tdest_arr [y][x] : 1'b0),
+ .s_axis_sou_tlast ((y != NEND) ? n2s_tlast_arr [y][x] : 1'b0),
+ .s_axis_sou_tvalid ((y != NEND) ? n2s_tvalid_arr[y][x] : 1'b0),
+ .s_axis_sou_tready ( n2s_tready_arr[y][x] ),
+ .m_axis_sou_tdata ( s2n_tdata_arr [(y+1)%N][x] ),
+ .m_axis_sou_tdest ( s2n_tdest_arr [(y+1)%N][x] ),
+ .m_axis_sou_tlast ( s2n_tlast_arr [(y+1)%N][x] ),
+ .m_axis_sou_tvalid ( s2n_tvalid_arr[(y+1)%N][x] ),
+ .m_axis_sou_tready ((y != NEND) ? s2n_tready_arr[(y+1)%N][x] : 1'b1)
+ );
+ end else begin
+ torus_2d_dor_router_single_sw #(
+ .WIDTH (WIDTH),
+ .DIM_SIZE (DIM_SIZE),
+ .XB_ADDR_X (x),
+ .XB_ADDR_Y (y),
+ .TERM_BUFF_SIZE (INGRESS_BUFF_SIZE),
+ .XB_BUFF_SIZE (ROUTER_BUFF_SIZE),
+ .ROUTING_ALLOC (ROUTING_ALLOC),
+ .SWITCH_ALLOC (SWITCH_ALLOC)
+ ) rtr_i (
+ // Clock and reset
+ .clk (clk),
+ .reset (reset | clear_routers),
+ // Terminals
+ .s_axis_term_tdata (i_tdata_arr [y][x]),
+ .s_axis_term_tlast (i_tlast_arr [y][x]),
+ .s_axis_term_tvalid (i_tvalid_arr[y][x]),
+ .s_axis_term_tready (i_tready_arr[y][x]),
+ .m_axis_term_tdata (o_tdata_arr [y][x]),
+ .m_axis_term_tlast (o_tlast_arr [y][x]),
+ .m_axis_term_tvalid (o_tvalid_arr[y][x]),
+ .m_axis_term_tready (o_tready_arr[y][x]),
+ // X-dim connections
+ .s_axis_xdim_tdata (e2w_tdata_arr [y][x] ),
+ .s_axis_xdim_tdest (e2w_tdest_arr [y][x] ),
+ .s_axis_xdim_tlast (e2w_tlast_arr [y][x] ),
+ .s_axis_xdim_tvalid (e2w_tvalid_arr[y][x] ),
+ .s_axis_xdim_tready (e2w_tready_arr[y][x] ),
+ .m_axis_xdim_tdata (e2w_tdata_arr [y][(x+1)%N]),
+ .m_axis_xdim_tdest (e2w_tdest_arr [y][(x+1)%N]),
+ .m_axis_xdim_tlast (e2w_tlast_arr [y][(x+1)%N]),
+ .m_axis_xdim_tvalid (e2w_tvalid_arr[y][(x+1)%N]),
+ .m_axis_xdim_tready (e2w_tready_arr[y][(x+1)%N]),
+ // Y-dim connections
+ .s_axis_ydim_tdata (s2n_tdata_arr [y][x] ),
+ .s_axis_ydim_tdest (s2n_tdest_arr [y][x] ),
+ .s_axis_ydim_tlast (s2n_tlast_arr [y][x] ),
+ .s_axis_ydim_tvalid (s2n_tvalid_arr[y][x] ),
+ .s_axis_ydim_tready (s2n_tready_arr[y][x] ),
+ .m_axis_ydim_tdata (s2n_tdata_arr [(y+1)%N][x]),
+ .m_axis_ydim_tdest (s2n_tdest_arr [(y+1)%N][x]),
+ .m_axis_ydim_tlast (s2n_tlast_arr [(y+1)%N][x]),
+ .m_axis_ydim_tvalid (s2n_tvalid_arr[(y+1)%N][x]),
+ .m_axis_ydim_tready (s2n_tready_arr[(y+1)%N][x])
+ );
+ end
+ end
+ end
+ endgenerate
+
+ //-------------------------------------------------------
+ // Deadlock detector
+ //-------------------------------------------------------
+ // A deadlock is defined on an AXIS bus as an extended period
+ // where tvlid=1 but tready=0. If at least one slave port is in
+ // this state and none of the master ports are then this router
+ // will go into a failsafe deadlock recovery mode. The DEADLOCK_TIMEOUT
+ // parameter defines the duration for which this condition has
+ // to be true. In deadlock recovery mode, all routers are held in reset
+ // (thus losing all packets in flights) and all input ports are flushed.
+
+ wire m_locked = |(m_axis_tvalid & ~m_axis_tready);
+ wire s_locked = |(s_axis_tvalid & ~s_axis_tready);
+
+ // A counter that tracks the duration for which the router is livelocked
+ // If the livelock duration is higher than DEADLOCK_TIMEOUT then it is a
+ // deadlock
+ reg [$clog2(DEADLOCK_TIMEOUT)-1:0] deadlock_counter = DEADLOCK_TIMEOUT-1;
+ always @(posedge clk) begin
+ if (reset | ~(s_locked & ~m_locked)) begin
+ deadlock_counter <= DEADLOCK_TIMEOUT-1;
+ end else if (deadlock_counter != 'd0) begin
+ deadlock_counter <= deadlock_counter - 1;
+ end
+ end
+
+ // A counter that tracks the deadlock recovery period. If the slave ports
+ // have no activity for DEADLOCK_TIMEOUT cycles then the router can
+ // successfully come out of the deadlocked state.
+ reg [$clog2(DEADLOCK_TIMEOUT)-1:0] deadlock_recover_counter = 'd0;
+ always @(posedge clk) begin
+ if (reset) begin
+ deadlock_recover_counter <= 'd0;
+ end else if (deadlock_detected) begin
+ if (|s_axis_tvalid)
+ deadlock_recover_counter <= DEADLOCK_TIMEOUT-1;
+ else
+ deadlock_recover_counter <= deadlock_recover_counter - 1;
+ end else if (deadlock_counter == 'd0) begin
+ deadlock_recover_counter <= DEADLOCK_TIMEOUT-1;
+ end
+ end
+ assign deadlock_detected = (deadlock_recover_counter != 0);
+
+endmodule
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v
new file mode 100644
index 000000000..6de082b4c
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v
@@ -0,0 +1,130 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: axis_ctrl_crossbar_nxn
+// Description:
+// This module implements a 2-dimentional (2d) mesh network (mesh) crossbar
+// for AXIS-CTRL traffic. Supports mesh and torus topologies.
+// It uses AXI-Stream for all of its links.
+// The torus topology, routing algorithms and the router architecture is
+// described in README.md in this directory.
+// Parameters:
+// - WIDTH: Width of the AXI-Stream data bus
+// - NPORTS: Number of ports (maximum 1024)
+// - TOPOLOGY: Is this a mesh (MESH) or a torus (TORUS) topology
+// - INGRESS_BUFF_SIZE: log2 of the ingress terminal buffer size (in words)
+// - ROUTER_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words)
+// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers.
+// * WORMHOLE: Allocate route as soon as first word in pkt arrives
+// * CUT-THROUGH: Allocate route only after the full pkt arrives
+// - SWITCH_ALLOC: Algorithm to allocate the switch
+// * PRIO: Priority based. Priority: Y-dim > X-dim > Term
+// * ROUND-ROBIN: Round robin input port allocation
+// - DEADLOCK_TIMEOUT: Number of cycles to wait until a deadlock is detected
+// Signals:
+// - s_axis_*: Slave port for router (flattened)
+// - m_axis_*: Master port for router (flattened)
+//
+
+module axis_ctrl_crossbar_nxn #(
+ parameter WIDTH = 32,
+ parameter NPORTS = 10,
+ parameter TOPOLOGY = "TORUS",
+ parameter INGRESS_BUFF_SIZE = 5,
+ parameter ROUTER_BUFF_SIZE = 5,
+ parameter ROUTING_ALLOC = "WORMHOLE",
+ parameter SWITCH_ALLOC = "PRIO",
+ parameter DEADLOCK_TIMEOUT = 16384
+) (
+ input wire clk,
+ input wire reset,
+ // Inputs
+ input wire [(NPORTS*WIDTH)-1:0] s_axis_tdata,
+ input wire [NPORTS-1:0] s_axis_tlast,
+ input wire [NPORTS-1:0] s_axis_tvalid,
+ output wire [NPORTS-1:0] s_axis_tready,
+ // Output
+ output wire [(NPORTS*WIDTH)-1:0] m_axis_tdata,
+ output wire [NPORTS-1:0] m_axis_tlast,
+ output wire [NPORTS-1:0] m_axis_tvalid,
+ input wire [NPORTS-1:0] m_axis_tready,
+ // Deadlock alert
+ output wire deadlock_detected
+);
+
+ function integer csqrt_max1024;
+ input integer value;
+ integer i;
+ begin
+ csqrt_max1024 = 1;
+ for (i = 1; i <= 32; i = i + 1) // sqrt(1024) = 32
+ csqrt_max1024 = csqrt_max1024 + (i*i < value ? 1 : 0);
+ end
+ endfunction
+
+ localparam integer DIM_SIZE = csqrt_max1024(NPORTS);
+
+ wire [(DIM_SIZE*DIM_SIZE*WIDTH)-1:0] i_tdata, o_tdata ;
+ wire [DIM_SIZE*DIM_SIZE-1:0] i_tlast, o_tlast ;
+ wire [DIM_SIZE*DIM_SIZE-1:0] i_tvalid, o_tvalid;
+ wire [DIM_SIZE*DIM_SIZE-1:0] i_tready, o_tready;
+
+ // axis_ctrl_crossbar_2d_mesh needs to scale up in squares
+ // i.e. 4, 9, 16, 25, ... but NPORTS can be any number, so
+ // instantiate the next highest square number of ports and
+ // terminate the rest.
+ axis_ctrl_crossbar_2d_mesh #(
+ .WIDTH (WIDTH),
+ .DIM_SIZE (DIM_SIZE),
+ .TOPOLOGY (TOPOLOGY),
+ .INGRESS_BUFF_SIZE(INGRESS_BUFF_SIZE),
+ .ROUTER_BUFF_SIZE (ROUTER_BUFF_SIZE),
+ .ROUTING_ALLOC (ROUTING_ALLOC),
+ .SWITCH_ALLOC (SWITCH_ALLOC),
+ .DEADLOCK_TIMEOUT (DEADLOCK_TIMEOUT)
+ ) router_dut_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (i_tdata),
+ .s_axis_tlast (i_tlast),
+ .s_axis_tvalid (i_tvalid),
+ .s_axis_tready (i_tready),
+ .m_axis_tdata (o_tdata),
+ .m_axis_tlast (o_tlast),
+ .m_axis_tvalid (o_tvalid),
+ .m_axis_tready (o_tready),
+ .deadlock_detected(deadlock_detected)
+ );
+
+ // Connect the bottom NPORTS to the IO
+ assign i_tdata[(NPORTS*WIDTH)-1:0] = s_axis_tdata;
+ assign i_tlast[NPORTS-1:0] = s_axis_tlast;
+ assign i_tvalid[NPORTS-1:0] = s_axis_tvalid;
+ assign s_axis_tready = i_tready[NPORTS-1:0];
+
+ assign m_axis_tdata = o_tdata[(NPORTS*WIDTH)-1:0];
+ assign m_axis_tlast = o_tlast[NPORTS-1:0];
+ assign m_axis_tvalid = o_tvalid[NPORTS-1:0];
+ assign o_tready[NPORTS-1:0] = m_axis_tready;
+
+ // Terminate the rest
+ genvar i;
+ generate for (i = NPORTS; i < (DIM_SIZE*DIM_SIZE); i = i + 1) begin: ports
+ axis_port_terminator #(.DATA_W(WIDTH)) term_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (o_tdata[(i*WIDTH)+:WIDTH]),
+ .s_axis_tlast (o_tlast[i]),
+ .s_axis_tvalid(o_tvalid[i]),
+ .s_axis_tready(o_tready[i]),
+ .m_axis_tdata (i_tdata[(i*WIDTH)+:WIDTH]),
+ .m_axis_tlast (i_tlast[i]),
+ .m_axis_tvalid(i_tvalid[i]),
+ .m_axis_tready(i_tready[i]),
+ .pkts_dropped ()
+ );
+ end endgenerate
+
+endmodule
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v
new file mode 100644
index 000000000..fd10d6682
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v
@@ -0,0 +1,178 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: axis_ingress_vc_buff
+// Description:
+// A wrapper around a buffer to implement one or more virtual channels
+// Supports gate a packet for cut-through routing
+
+module axis_ingress_vc_buff #(
+ parameter WIDTH = 64, // Width of the datapath
+ parameter NUM_VCS = 2, // Number of virtual channels
+ parameter SIZE = 5, // Virtual channel buffer size
+ parameter ROUTING = "WORMHOLE", // Routing (switching) method {WORMHOLE, CUT-THROUGH}
+ parameter DEST_W = (NUM_VCS > 1) ? $clog2(NUM_VCS) : 1 // PRIVATE
+) (
+ input wire clk,
+ input wire reset,
+ input wire [WIDTH-1:0] s_axis_tdata,
+ input wire [DEST_W-1:0] s_axis_tdest,
+ input wire s_axis_tlast,
+ input wire s_axis_tvalid,
+ output wire s_axis_tready,
+ output wire [WIDTH-1:0] m_axis_tdata,
+ output wire m_axis_tlast,
+ output wire m_axis_tvalid,
+ input wire m_axis_tready
+);
+
+ generate if (NUM_VCS > 1) begin
+ //----------------------------------------------------
+ // Multiple virtual channels
+ //----------------------------------------------------
+
+ wire [(WIDTH*NUM_VCS)-1:0] bufin_tdata , bufout_tdata ;
+ wire [NUM_VCS-1:0] bufin_tlast , bufout_tlast ;
+ wire [NUM_VCS-1:0] bufin_tvalid, bufout_tvalid;
+ wire [NUM_VCS-1:0] bufin_tready, bufout_tready;
+
+ axi_demux #(
+ .WIDTH(WIDTH), .SIZE(NUM_VCS),
+ .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0)
+ ) vc_demux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .header (/* unused */),
+ .dest (s_axis_tdest ),
+ .i_tdata (s_axis_tdata ),
+ .i_tlast (s_axis_tlast ),
+ .i_tvalid (s_axis_tvalid),
+ .i_tready (s_axis_tready),
+ .o_tdata (bufin_tdata),
+ .o_tlast (bufin_tlast),
+ .o_tvalid (bufin_tvalid),
+ .o_tready (bufin_tready)
+ );
+
+ genvar vc;
+ for (vc = 0; vc < NUM_VCS; vc = vc + 1) begin
+ if (ROUTING == "WORMHOLE") begin
+ axi_fifo #(
+ .WIDTH(WIDTH+1), .SIZE(SIZE)
+ ) buf_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata ({bufin_tlast[vc], bufin_tdata [(vc*WIDTH)+:WIDTH]}),
+ .i_tvalid (bufin_tvalid [vc]),
+ .i_tready (bufin_tready [vc]),
+ .o_tdata ({bufout_tlast[vc], bufout_tdata [(vc*WIDTH)+:WIDTH]}),
+ .o_tvalid (bufout_tvalid[vc]),
+ .o_tready (bufout_tready[vc]),
+ .space (),
+ .occupied ()
+ );
+ end else begin
+ axi_packet_gate #(
+ .WIDTH(WIDTH), .SIZE(SIZE)
+ ) buf_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata (bufin_tdata[(vc*WIDTH)+:WIDTH]),
+ .i_tlast (bufin_tlast[vc]),
+ .i_tvalid (bufin_tvalid[vc]),
+ .i_tready (bufin_tready[vc]),
+ .i_terror (1'b0),
+ .o_tdata (bufout_tdata[(vc*WIDTH)+:WIDTH]),
+ .o_tlast (bufout_tlast[vc]),
+ .o_tvalid (bufout_tvalid[vc]),
+ .o_tready (bufout_tready[vc])
+ );
+ end
+ end
+
+ axi_mux #(
+ .WIDTH(WIDTH), .SIZE(NUM_VCS),
+ .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1)
+ ) vc_mux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata (bufout_tdata ),
+ .i_tlast (bufout_tlast ),
+ .i_tvalid (bufout_tvalid),
+ .i_tready (bufout_tready),
+ .o_tdata (m_axis_tdata ),
+ .o_tlast (m_axis_tlast ),
+ .o_tvalid (m_axis_tvalid),
+ .o_tready (m_axis_tready)
+ );
+
+ end else begin
+ //----------------------------------------------------
+ // Single virtual channel
+ //----------------------------------------------------
+ wire [WIDTH-1:0] pipe_tdata;
+ wire pipe_tlast;
+ wire pipe_tvalid;
+ wire pipe_tready;
+
+ if (ROUTING == "WORMHOLE") begin
+ axi_fifo #(
+ .WIDTH(WIDTH+1), .SIZE(SIZE)
+ ) buf_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata ({s_axis_tlast, s_axis_tdata}),
+ .i_tvalid (s_axis_tvalid ),
+ .i_tready (s_axis_tready ),
+ .o_tdata ({pipe_tlast, pipe_tdata}),
+ .o_tvalid (pipe_tvalid),
+ .o_tready (pipe_tready),
+ .space (),
+ .occupied ()
+ );
+ end else begin
+ axi_packet_gate #(
+ .WIDTH(WIDTH), .SIZE(SIZE)
+ ) buf_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata (s_axis_tdata),
+ .i_tlast (s_axis_tlast),
+ .i_tvalid (s_axis_tvalid),
+ .i_tready (s_axis_tready),
+ .i_terror (1'b0),
+ .o_tdata (pipe_tdata),
+ .o_tlast (pipe_tlast),
+ .o_tvalid (pipe_tvalid),
+ .o_tready (pipe_tready)
+ );
+ end
+
+ axi_fifo #(
+ .WIDTH(WIDTH+1), .SIZE(1)
+ ) buf_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata ({pipe_tlast, pipe_tdata}),
+ .i_tvalid (pipe_tvalid ),
+ .i_tready (pipe_tready ),
+ .o_tdata ({m_axis_tlast, m_axis_tdata}),
+ .o_tvalid (m_axis_tvalid),
+ .o_tready (m_axis_tready),
+ .space (),
+ .occupied ()
+ );
+
+ end endgenerate
+
+endmodule
+
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v
new file mode 100644
index 000000000..bf9fa24be
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v
@@ -0,0 +1,44 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: axis_port_terminator
+// Description:
+// A dummy terminator for unused crossbar ports
+
+module axis_port_terminator #(
+ parameter DATA_W = 64
+) (
+ // Clocks and resets
+ input wire clk,
+ input wire reset,
+ // Input ports
+ input wire [DATA_W-1:0] s_axis_tdata, // Input data
+ input wire s_axis_tlast, // Input EOP (last)
+ input wire s_axis_tvalid, // Input valid
+ output wire s_axis_tready, // Input ready
+ // Output ports
+ output wire [DATA_W-1:0] m_axis_tdata, // Output data
+ output wire m_axis_tlast, // Output EOP (last)
+ output wire m_axis_tvalid, // Output valid
+ input wire m_axis_tready, // Output ready
+ // Metrics
+ output reg [15:0] pkts_dropped
+);
+
+ assign s_axis_tready = 1'b1;
+ assign m_axis_tdata = {DATA_W{1'b0}};
+ assign m_axis_tlast = 1'b0;
+ assign m_axis_tvalid = 1'b0;
+
+ always @(posedge clk) begin
+ if (reset) begin
+ pkts_dropped <= 'd0;
+ end else if (s_axis_tvalid & s_axis_tlast & s_axis_tready) begin
+ pkts_dropped <= pkts_dropped + 'd1;
+ end
+ end
+
+endmodule
+
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v
new file mode 100644
index 000000000..24b9e4129
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v
@@ -0,0 +1,164 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: axis_switch
+// Description:
+// Implementation of a M-input, N-output AXI-Stream switch.
+// One of the M input ports is allocated based on the s_axis_alloc signal
+// and the packet on that port is sent to one of the N output ports based
+// on the tdest signal
+
+module axis_switch #(
+ parameter DATA_W = 64, // tdata width
+ parameter DEST_W = 1, // Output tdest width
+ parameter IN_PORTS = 3, // Number of input ports
+ parameter OUT_PORTS = 3, // Number of output ports
+ parameter PIPELINE = 1, // Instantiate output pipeline stage?
+ parameter ALLOC_W = (IN_PORTS == 1) ? 1 : $clog2(IN_PORTS) //PRIVATE
+) (
+ // Clocks and resets
+ input wire clk, // Switch clock
+ input wire reset, // Reset
+ // Input ports
+ input wire [(DATA_W*IN_PORTS)-1:0] s_axis_tdata, // Input data
+ input wire [((DEST_W+$clog2(OUT_PORTS))*IN_PORTS)-1:0] s_axis_tdest, // Input destination
+ input wire [IN_PORTS-1:0] s_axis_tlast, // Input EOP (last)
+ input wire [IN_PORTS-1:0] s_axis_tvalid, // Input valid
+ output wire [IN_PORTS-1:0] s_axis_tready, // Input ready
+ input wire [ALLOC_W-1:0] s_axis_alloc, // Input port allocation for switch
+ // Output ports
+ output wire [(DATA_W*OUT_PORTS)-1:0] m_axis_tdata, // Output data
+ output wire [(DEST_W*OUT_PORTS)-1:0] m_axis_tdest, // Output destination
+ output wire [OUT_PORTS-1:0] m_axis_tlast, // Output EOP (last)
+ output wire [OUT_PORTS-1:0] m_axis_tvalid, // Output valid
+ input wire [OUT_PORTS-1:0] m_axis_tready // Output ready
+);
+ // PRIVATE: Vivado synthesizer workaround (cannot be localparam)
+ localparam CLOG2_IN_PORTS = $clog2(IN_PORTS);
+ localparam CLOG2_OUT_PORTS = $clog2(OUT_PORTS);
+
+ //---------------------------------------------------------
+ // Flatten/unflatten and pipeline
+ //---------------------------------------------------------
+ wire [DATA_W-1:0] i_tdata [0:IN_PORTS-1];
+ wire [DEST_W+$clog2(OUT_PORTS)-1:0] i_tdest [0:IN_PORTS-1];
+ wire i_tlast [0:IN_PORTS-1];
+ wire [IN_PORTS-1:0] i_tvalid;
+ wire [IN_PORTS-1:0] i_tready;
+ wire [ALLOC_W-1:0] i_alloc;
+ wire [DATA_W-1:0] o_tdata [0:OUT_PORTS-1];
+ wire [DEST_W-1:0] o_tdest [0:OUT_PORTS-1];
+ wire o_tlast [0:OUT_PORTS-1];
+ wire [OUT_PORTS-1:0] o_tvalid;
+ wire [OUT_PORTS-1:0] o_tready;
+
+ genvar i, o;
+ generate
+ for (i = 0; i < IN_PORTS; i = i + 1) begin: in_ports
+ assign i_tdata [i] = s_axis_tdata [(i*DATA_W)+:DATA_W];
+ assign i_tdest [i] = s_axis_tdest [(i*(DEST_W+CLOG2_OUT_PORTS))+:(DEST_W+CLOG2_OUT_PORTS)];
+ assign i_tlast [i] = s_axis_tlast [i];
+ assign i_tvalid [i] = s_axis_tvalid[i];
+ assign s_axis_tready[i] = i_tready [i];
+ end
+ assign i_alloc = s_axis_alloc; //i_alloc has to be delay matched to valid
+
+ for (o = 0; o < OUT_PORTS; o = o + 1) begin
+ if (PIPELINE == 1) begin
+ axi_fifo_flop2 #(.WIDTH(DEST_W+1+DATA_W)) out_pipe_i (
+ .clk(clk), .reset(reset), .clear(1'b0),
+ .i_tdata({o_tdest[o], o_tlast[o], o_tdata[o]}),
+ .i_tvalid(o_tvalid[o]), .i_tready(o_tready[o]),
+ .o_tdata({m_axis_tdest[(o*DEST_W)+:DEST_W], m_axis_tlast[o], m_axis_tdata[(o*DATA_W)+:DATA_W]}),
+ .o_tvalid(m_axis_tvalid[o]), .o_tready(m_axis_tready[o]),
+ .space(), .occupied()
+ );
+ end else begin
+ assign m_axis_tdata [(o*DATA_W)+:DATA_W] = o_tdata [o];
+ assign m_axis_tdest [(o*DEST_W)+:DEST_W] = o_tdest [o];
+ assign m_axis_tlast [o] = o_tlast [o];
+ assign m_axis_tvalid[o] = o_tvalid [o];
+ assign o_tready [o] = m_axis_tready[o];
+ end
+ end
+ endgenerate
+
+ //---------------------------------------------------------
+ // Allocator
+ //---------------------------------------------------------
+ // The "chosen" input port will drive this bus
+ wire [DATA_W-1:0] master_tdata;
+ wire [DEST_W+$clog2(OUT_PORTS)-1:0] master_tdest;
+ wire master_tlast;
+ wire master_tvalid;
+ wire master_tready;
+
+ generate if (IN_PORTS > 1) begin
+ reg [IN_PORTS-1:0] ialloc_oh;
+ reg [$clog2(IN_PORTS)-1:0] alloc_reg;
+ always @(posedge clk) begin
+ if (reset) begin
+ ialloc_oh <= {IN_PORTS{1'b0}};
+ end else begin
+ if (ialloc_oh == {IN_PORTS{1'b0}}) begin
+ if (|i_tvalid) begin
+ ialloc_oh[i_alloc] <= 1'b1;
+ alloc_reg <= i_alloc;
+ end
+ end else begin
+ if(master_tready & master_tvalid & master_tlast)
+ ialloc_oh <= {IN_PORTS{1'b0}};
+ end
+ end
+ end
+
+ assign master_tdata = i_tdata[alloc_reg];
+ assign master_tdest = i_tdest[alloc_reg];
+ assign master_tlast = i_tlast[alloc_reg];
+ assign master_tvalid = |(i_tvalid & ialloc_oh);
+ assign i_tready = i_tvalid & ialloc_oh & {IN_PORTS{master_tready}};
+ end else begin
+ // Special case: One input port
+ assign master_tdata = i_tdata[0];
+ assign master_tdest = i_tdest[0];
+ assign master_tlast = i_tlast[0];
+ assign master_tvalid = i_tvalid[0];
+ assign i_tready[0] = master_tready;
+ end endgenerate
+
+ //---------------------------------------------------------
+ // Router
+ //---------------------------------------------------------
+ generate if (OUT_PORTS > 1) begin
+ reg [OUT_PORTS-1:0] odst_oh;
+ always @(posedge clk) begin
+ if (reset) begin
+ odst_oh <= {OUT_PORTS{1'b0}};
+ end else begin
+ if (odst_oh == {OUT_PORTS{1'b0}}) begin
+ if (master_tvalid)
+ odst_oh[master_tdest[CLOG2_OUT_PORTS-1:0]] <= 1'b1;
+ end else begin
+ if(master_tready & master_tvalid & master_tlast)
+ odst_oh <= {OUT_PORTS{1'b0}};
+ end
+ end
+ end
+ assign master_tready = |(o_tready & odst_oh);
+ assign o_tvalid = {OUT_PORTS{master_tvalid}} & odst_oh;
+ end else begin
+ // Special case: One output port
+ assign master_tready = o_tready[0];
+ assign o_tvalid[0] = master_tvalid;
+ end endgenerate
+
+ generate for (o = 0; o < OUT_PORTS; o = o + 1) begin
+ assign o_tdata[o] = master_tdata;
+ assign o_tdest[o] = master_tdest[DEST_W+CLOG2_OUT_PORTS-1:CLOG2_OUT_PORTS];
+ assign o_tlast[o] = master_tlast;
+ end endgenerate
+
+endmodule
+
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v b/fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v
new file mode 100644
index 000000000..79f1a6626
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v
@@ -0,0 +1,381 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: chdr_crossbar_nxn
+// Description:
+// This module implements a full-bandwidth NxN crossbar with N input and output ports
+// for CHDR traffic. It supports multiple optimization strategies for performance,
+// area and timing tradeoffs. It uses AXI-Stream for all of its links. The crossbar
+// has a dynamic routing table based on a Content Addressable Memory (CAM). The SID
+// is used to determine the destination of a packet and the routing table contains
+// a re-programmable SID to crossbar port mapping. The table is programmed using
+// special route config packets on the data input ports or using an optional
+// management port.
+// The topology, routing algorithms and the router architecture is
+// described in README.md in this directory.
+// Parameters:
+// - CHDR_W: Width of the AXI-Stream data bus
+// - NPORTS: Number of ports to instantiate
+// - DEFAULT_PORT: The failsafe port to forward a packet to is SID mapping is missing
+// - MTU: log2 of max packet size (in words)
+// - ROUTE_TBL_SIZE: log2 of the number of mappings that the routing table can hold
+// at any time. Mapping values are maintained in a FIFO fashion.
+// - MUX_ALLOC: Algorithm to allocate the egress MUX
+// * PRIO: Priority based. Lower port numbers have a higher priority
+// * ROUND-ROBIN: Round robin input port allocation
+// - OPTIMIZE: Optimization strategy for performance vs area vs timing tradeoffs
+// * AREA: Attempt to minimize area at the cost of performance (throughput) and/or timing
+// * PERFORMANCE: Attempt to maximize performance at the cost of area and/or timing
+// * TIMING: Attempt to maximize Fmax at the cost of area and/or performance
+// - NPORTS_MGMT: Number of ports with management endpoint. The first NPORTS_MGMT ports will
+// have the management port instantiated
+// - EXT_RTCFG_PORT: Enable a side-channel AXI-Stream management port to configure the
+// routing table
+// Signals:
+// - s_axis_*: Slave port for router (flattened)
+// - m_axis_*: Master port for router (flattened)
+// - s_axis_mgmt_*: Management slave port
+// - device_id: The ID of the device that has instantiated this module
+//
+
+module chdr_crossbar_nxn #(
+ parameter [15:0] PROTOVER = {8'd1, 8'd0},
+ parameter CHDR_W = 64,
+ parameter [7:0] NPORTS = 8,
+ parameter [7:0] DEFAULT_PORT = 0,
+ parameter MTU = 9,
+ parameter ROUTE_TBL_SIZE = 6,
+ parameter MUX_ALLOC = "ROUND-ROBIN",
+ parameter OPTIMIZE = "AREA",
+ parameter [7:0] NPORTS_MGMT = NPORTS,
+ parameter [0:0] EXT_RTCFG_PORT = 0
+) (
+ input wire clk,
+ input wire reset,
+ // Device info
+ input wire [15:0] device_id,
+ // Inputs
+ input wire [(CHDR_W*NPORTS)-1:0] s_axis_tdata,
+ input wire [NPORTS-1:0] s_axis_tlast,
+ input wire [NPORTS-1:0] s_axis_tvalid,
+ output wire [NPORTS-1:0] s_axis_tready,
+ // Output
+ output wire [(CHDR_W*NPORTS)-1:0] m_axis_tdata,
+ output wire [NPORTS-1:0] m_axis_tlast,
+ output wire [NPORTS-1:0] m_axis_tvalid,
+ input wire [NPORTS-1:0] m_axis_tready,
+ // Router config management port
+ input wire ext_rtcfg_stb,
+ input wire [15:0] ext_rtcfg_addr,
+ input wire [31:0] ext_rtcfg_data,
+ output wire ext_rtcfg_ack
+);
+ // ---------------------------------------------------
+ // RFNoC Includes
+ // ---------------------------------------------------
+ `include "../core/rfnoc_chdr_utils.vh"
+ `include "../core/rfnoc_chdr_internal_utils.vh"
+
+ localparam NPORTS_W = $clog2(NPORTS);
+ localparam EPID_W = 16;
+ localparam [17:0] EXT_INFO = {1'b0, EXT_RTCFG_PORT, NPORTS_MGMT, NPORTS};
+
+ localparam [0:0] PKT_ST_HEAD = 1'b0;
+ localparam [0:0] PKT_ST_BODY = 1'b1;
+
+ // The compute_mux_alloc function is the switch allocation function for the MUX
+ // i.e. it chooses which input port reserves the output MUX for packet transfer.
+ function [NPORTS_W-1:0] compute_mux_alloc;
+ input [NPORTS-1:0] pkt_waiting;
+ input [NPORTS_W-1:0] last_alloc;
+ reg signed [NPORTS_W:0] i;
+ begin
+ compute_mux_alloc = last_alloc;
+ for (i = NPORTS-1; i >= 0; i=i-1) begin
+ if (MUX_ALLOC == "PRIO") begin
+ // Priority. Lower port index gets a higher priority.
+ if (pkt_waiting[i])
+ compute_mux_alloc = i;
+ end else begin
+ // Round-robin
+ if (pkt_waiting[(last_alloc + i + 1) % NPORTS])
+ compute_mux_alloc = (last_alloc + i + 1) % NPORTS;
+ end
+ end
+ end
+ endfunction
+
+ wire [NPORTS-1:0] rtcfg_req_wr;
+ wire [(16*NPORTS)-1:0] rtcfg_req_addr;
+ wire [(32*NPORTS)-1:0] rtcfg_req_data;
+ wire [NPORTS-1:0] rtcfg_resp_ack;
+ wire [(EPID_W*NPORTS)-1:0] find_tdata;
+ wire [NPORTS-1:0] find_tvalid;
+ wire [NPORTS-1:0] find_tready;
+ wire [(NPORTS_W*NPORTS)-1:0] result_tdata;
+ wire [NPORTS-1:0] result_tkeep;
+ wire [NPORTS-1:0] result_tvalid;
+ wire [NPORTS-1:0] result_tready;
+
+ // Instantiate a single CAM-based routing table that will be shared between all
+ // input ports. Configuration and lookup is performed using an AXI-Stream iface.
+ // If multiple packets arrive simultaneously, only the headers of those packets will
+ // be serialized in order to arbitrate this map. Selection is done round-robin.
+ chdr_xb_routing_table #(
+ .SIZE(ROUTE_TBL_SIZE), .NPORTS(NPORTS),
+ .EXT_INS_PORT_EN(EXT_RTCFG_PORT)
+ ) routing_tbl_i (
+ .clk (clk ),
+ .reset (reset ),
+ .port_req_wr (rtcfg_req_wr ),
+ .port_req_addr (rtcfg_req_addr),
+ .port_req_data (rtcfg_req_data),
+ .port_resp_ack (rtcfg_resp_ack),
+ .ext_req_wr (ext_rtcfg_stb ),
+ .ext_req_addr (ext_rtcfg_addr),
+ .ext_req_data (ext_rtcfg_data),
+ .ext_resp_ack (ext_rtcfg_ack ),
+ .axis_find_tdata (find_tdata ),
+ .axis_find_tvalid (find_tvalid ),
+ .axis_find_tready (find_tready ),
+ .axis_result_tdata (result_tdata ),
+ .axis_result_tkeep (result_tkeep ),
+ .axis_result_tvalid(result_tvalid ),
+ .axis_result_tready(result_tready )
+ );
+
+ wire [CHDR_W-1:0] i_tdata [0:NPORTS-1];
+ wire [9:0] i_tdest [0:NPORTS-1];
+ wire [1:0] i_tid [0:NPORTS-1];
+ wire i_tlast [0:NPORTS-1];
+ wire i_tvalid [0:NPORTS-1];
+ wire i_tready [0:NPORTS-1];
+ wire [CHDR_W-1:0] buf_tdata [0:NPORTS-1];
+ wire [NPORTS_W-1:0] buf_tdest [0:NPORTS-1], buf_tdest_tmp[0:NPORTS-1];
+ wire buf_tkeep [0:NPORTS-1];
+ wire buf_tlast [0:NPORTS-1];
+ wire buf_tvalid[0:NPORTS-1];
+ wire buf_tready[0:NPORTS-1];
+ wire [CHDR_W-1:0] swi_tdata [0:NPORTS-1];
+ wire [NPORTS_W-1:0] swi_tdest [0:NPORTS-1];
+ wire swi_tlast [0:NPORTS-1];
+ wire swi_tvalid[0:NPORTS-1];
+ wire swi_tready[0:NPORTS-1];
+ wire [(CHDR_W*NPORTS)-1:0] swo_tdata [0:NPORTS-1], muxi_tdata [0:NPORTS-1];
+ wire [NPORTS-1:0] swo_tlast [0:NPORTS-1], muxi_tlast [0:NPORTS-1];
+ wire [NPORTS-1:0] swo_tvalid[0:NPORTS-1], muxi_tvalid[0:NPORTS-1];
+ wire [NPORTS-1:0] swo_tready[0:NPORTS-1], muxi_tready[0:NPORTS-1];
+
+ genvar n, i, j;
+ generate
+ for (n = 0; n < NPORTS; n = n + 1) begin: i_ports
+ // For each input port, first check if we have a management packet
+ // arriving. If it arrives, the top config commands are extrated, sent to the
+ // routing table for configuration, and the rest of the packet is forwarded
+ // down to the router.
+ // the router.
+ if (n < NPORTS_MGMT) begin
+ chdr_mgmt_pkt_handler #(
+ .PROTOVER(PROTOVER), .CHDR_W(CHDR_W), .MGMT_ONLY(0)
+ ) mgmt_ep_i (
+ .clk (clk ),
+ .rst (reset ),
+ .node_info (chdr_mgmt_build_node_info(EXT_INFO, n, NODE_TYPE_XBAR, device_id)),
+ .s_axis_chdr_tdata (s_axis_tdata [(n*CHDR_W)+:CHDR_W] ),
+ .s_axis_chdr_tlast (s_axis_tlast [n] ),
+ .s_axis_chdr_tvalid (s_axis_tvalid[n] ),
+ .s_axis_chdr_tready (s_axis_tready[n] ),
+ .s_axis_chdr_tuser ('d0 ),
+ .m_axis_chdr_tdata (i_tdata [n] ),
+ .m_axis_chdr_tdest (i_tdest [n] ),
+ .m_axis_chdr_tid (i_tid [n] ),
+ .m_axis_chdr_tlast (i_tlast [n] ),
+ .m_axis_chdr_tvalid (i_tvalid [n] ),
+ .m_axis_chdr_tready (i_tready [n] ),
+ .ctrlport_req_wr (rtcfg_req_wr [n] ),
+ .ctrlport_req_rd (/* unused */ ),
+ .ctrlport_req_addr (rtcfg_req_addr[(n*16)+:16] ),
+ .ctrlport_req_data (rtcfg_req_data[(n*32)+:32] ),
+ .ctrlport_resp_ack (rtcfg_resp_ack[n] ),
+ .ctrlport_resp_data (32'h0 /* unused */ ),
+ .op_stb (/* unused */ ),
+ .op_dst_epid (/* unused */ ),
+ .op_src_epid (/* unused */ ),
+ .op_data (/* unused */ )
+ );
+ end else begin
+ assign i_tdata [n] = s_axis_tdata [(n*CHDR_W)+:CHDR_W];
+ assign i_tid [n] = CHDR_MGMT_ROUTE_EPID;
+ assign i_tdest [n] = 10'd0; // Unused
+ assign i_tlast [n] = s_axis_tlast [n];
+ assign i_tvalid [n] = s_axis_tvalid[n];
+ assign s_axis_tready[n] = i_tready [n];
+
+ assign rtcfg_req_wr [n] = 1'b0;
+ assign rtcfg_req_addr[(n*16)+:16] = 16'h0;
+ assign rtcfg_req_data[(n*32)+:32] = 32'h0;
+ end
+
+ // Ingress buffer module that does the following:
+ // - Stores and gates an incoming packet
+ // - Looks up destination in routing table and attaches a tdest for the packet
+ chdr_xb_ingress_buff #(
+ .WIDTH(CHDR_W), .MTU(MTU), .DEST_W(NPORTS_W), .NODE_ID(n)
+ ) buf_i (
+ .clk (clk ),
+ .reset (reset ),
+ .s_axis_chdr_tdata (i_tdata [n] ),
+ .s_axis_chdr_tdest (i_tdest [n][NPORTS_W-1:0] ),
+ .s_axis_chdr_tid (i_tid [n] ),
+ .s_axis_chdr_tlast (i_tlast [n] ),
+ .s_axis_chdr_tvalid (i_tvalid [n] ),
+ .s_axis_chdr_tready (i_tready [n] ),
+ .m_axis_chdr_tdata (buf_tdata [n] ),
+ .m_axis_chdr_tdest (buf_tdest_tmp[n] ),
+ .m_axis_chdr_tkeep (buf_tkeep [n] ),
+ .m_axis_chdr_tlast (buf_tlast [n] ),
+ .m_axis_chdr_tvalid (buf_tvalid [n] ),
+ .m_axis_chdr_tready (buf_tready [n] ),
+ .m_axis_find_tdata (find_tdata [(n*EPID_W)+:EPID_W] ),
+ .m_axis_find_tvalid (find_tvalid [n] ),
+ .m_axis_find_tready (find_tready [n] ),
+ .s_axis_result_tdata (result_tdata [(n*NPORTS_W)+:NPORTS_W]),
+ .s_axis_result_tkeep (result_tkeep [n] ),
+ .s_axis_result_tvalid(result_tvalid[n] ),
+ .s_axis_result_tready(result_tready[n] )
+ );
+ assign buf_tdest[n] = buf_tkeep[n] ? buf_tdest_tmp[n] : DEFAULT_PORT[NPORTS_W-1:0];
+
+ // Pipeline state
+ axi_fifo #(
+ .WIDTH(CHDR_W+1+NPORTS_W), .SIZE(1)
+ ) pipe_i (
+ .clk (clk ),
+ .reset (reset ),
+ .clear (1'b0 ),
+ .i_tdata ({buf_tlast[n], buf_tdest[n], buf_tdata[n]}),
+ .i_tvalid (buf_tvalid[n] ),
+ .i_tready (buf_tready[n] ),
+ .o_tdata ({swi_tlast[n], swi_tdest[n], swi_tdata[n]}),
+ .o_tvalid (swi_tvalid[n] ),
+ .o_tready (swi_tready[n] ),
+ .space (/* Unused */ ),
+ .occupied (/* Unused */ )
+ );
+
+ // Ingress demux. Use the tdest field to determine packet destination
+ axis_switch #(
+ .DATA_W(CHDR_W), .DEST_W(1), .IN_PORTS(1), .OUT_PORTS(NPORTS), .PIPELINE(1)
+ ) demux_i (
+ .clk (clk ),
+ .reset (reset ),
+ .s_axis_tdata (swi_tdata [n] ),
+ .s_axis_tdest ({1'b0, swi_tdest [n]}),
+ .s_axis_tlast (swi_tlast [n] ),
+ .s_axis_tvalid (swi_tvalid[n] ),
+ .s_axis_tready (swi_tready[n] ),
+ .s_axis_alloc (1'b0 ),
+ .m_axis_tdata (swo_tdata [n] ),
+ .m_axis_tdest (/* Unused */ ),
+ .m_axis_tlast (swo_tlast [n] ),
+ .m_axis_tvalid (swo_tvalid[n] ),
+ .m_axis_tready (swo_tready[n] )
+ );
+ end
+
+ for (i = 0; i < NPORTS; i = i + 1) begin
+ for (j = 0; j < NPORTS; j = j + 1) begin
+ assign muxi_tdata [i][j*CHDR_W+:CHDR_W] = swo_tdata [j][i*CHDR_W+:CHDR_W];
+ assign muxi_tlast [i][j] = swo_tlast [j][i];
+ assign muxi_tvalid[i][j] = swo_tvalid [j][i];
+ assign swo_tready [i][j] = muxi_tready[j][i];
+ end
+ end
+
+ for (n = 0; n < NPORTS; n = n + 1) begin: o_ports
+ if (OPTIMIZE == "PERFORMANCE") begin
+ // Use the axis_switch module when optimizing for performance
+ // This logic has some extra levels of logic to ensure
+ // that the switch allocation happens in 0 clock cycles which
+ // means that Fmax for this implementation will be lower.
+
+ wire mux_ready = |muxi_tready[n]; // Max 1 bit should be high
+ wire mux_valid = |muxi_tvalid[n];
+ wire mux_last = |(muxi_tvalid[n] & muxi_tlast[n]);
+
+ // Track the input packet state
+ reg [0:0] pkt_state = PKT_ST_HEAD;
+ always @(posedge clk) begin
+ if (reset) begin
+ pkt_state <= PKT_ST_HEAD;
+ end else if (mux_valid & mux_ready) begin
+ pkt_state <= mux_last ? PKT_ST_HEAD : PKT_ST_BODY;
+ end
+ end
+
+ // The switch requires the allocation to stay valid until the
+ // end of the packet. We also might need to keep the previous
+ // packet's allocation to compute the current one
+ reg [NPORTS_W-1:0] prev_sw_alloc = {NPORTS_W{1'b0}};
+ reg [NPORTS_W-1:0] pkt_sw_alloc = {NPORTS_W{1'b0}};
+ wire [NPORTS_W-1:0] muxi_sw_alloc = (mux_valid && pkt_state == PKT_ST_HEAD) ?
+ compute_mux_alloc(muxi_tvalid[n], prev_sw_alloc) : pkt_sw_alloc;
+
+ always @(posedge clk) begin
+ if (reset) begin
+ prev_sw_alloc <= {NPORTS_W{1'b0}};
+ pkt_sw_alloc <= {NPORTS_W{1'b0}};
+ end else if (mux_valid & mux_ready) begin
+ if (pkt_state == PKT_ST_HEAD)
+ pkt_sw_alloc <= muxi_sw_alloc;
+ if (mux_last)
+ prev_sw_alloc <= muxi_sw_alloc;
+ end
+ end
+
+ axis_switch #(
+ .DATA_W(CHDR_W), .DEST_W(1), .IN_PORTS(NPORTS), .OUT_PORTS(1),
+ .PIPELINE(0)
+ ) mux_i (
+ .clk (clk ),
+ .reset (reset ),
+ .s_axis_tdata (muxi_tdata [n] ),
+ .s_axis_tdest ({NPORTS{1'b0}} /* Unused */ ),
+ .s_axis_tlast (muxi_tlast [n] ),
+ .s_axis_tvalid (muxi_tvalid[n] ),
+ .s_axis_tready (muxi_tready[n] ),
+ .s_axis_alloc (muxi_sw_alloc ),
+ .m_axis_tdata (m_axis_tdata [(n*CHDR_W)+:CHDR_W]),
+ .m_axis_tdest (/* Unused */ ),
+ .m_axis_tlast (m_axis_tlast [n] ),
+ .m_axis_tvalid (m_axis_tvalid[n] ),
+ .m_axis_tready (m_axis_tready[n] )
+ );
+ end else begin
+ // axi_mux has an additional bubble cycle but the logic
+ // to allocate an input port has fewer levels and takes
+ // up fewer resources.
+ axi_mux #(
+ .PRIO(MUX_ALLOC == "PRIO"), .WIDTH(CHDR_W), .SIZE(NPORTS),
+ .PRE_FIFO_SIZE(OPTIMIZE == "TIMING" ? 1 : 0), .POST_FIFO_SIZE(1)
+ ) mux_i (
+ .clk (clk ),
+ .reset (reset ),
+ .clear (1'b0 ),
+ .i_tdata (muxi_tdata [n] ),
+ .i_tlast (muxi_tlast [n] ),
+ .i_tvalid (muxi_tvalid [n] ),
+ .i_tready (muxi_tready [n] ),
+ .o_tdata (m_axis_tdata [(n*CHDR_W)+:CHDR_W]),
+ .o_tlast (m_axis_tlast [n] ),
+ .o_tvalid (m_axis_tvalid[n] ),
+ .o_tready (m_axis_tready[n] )
+ );
+ end
+ end
+ endgenerate
+
+
+endmodule
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v
new file mode 100644
index 000000000..dcb11da8e
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v
@@ -0,0 +1,259 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: chdr_ingress_buff
+//
+// Description:
+//
+// Ingress buffer module for the CHDR crossbar. This module stores and gates
+// the incoming packet and simultaneously determines the destination (TDEST)
+// by inspecting the incoming TID. If the TID is CHDR_MGMT_ROUTE_EPID then we
+// perform a lookup on the TID to determine the correct output for TDEST.
+//
+// Parameters:
+//
+// WIDTH : Data width of the CHDR interfaces (TDATA)
+// MTU : Maximum transmission unit, in WIDTH-sized words, is 2**MTU
+// DEST_W : Width of the destination routing information (TDEST)
+// NODE_ID : Numeric identifier for this port
+//
+
+module chdr_xb_ingress_buff #(
+ parameter WIDTH = 64,
+ parameter MTU = 5,
+ parameter DEST_W = 4,
+ parameter [9:0] NODE_ID = 0
+) (
+ input wire clk,
+ input wire reset,
+ // CHDR input port
+ input wire [WIDTH-1:0] s_axis_chdr_tdata,
+ input wire [DEST_W-1:0] s_axis_chdr_tdest,
+ input wire [1:0] s_axis_chdr_tid,
+ input wire s_axis_chdr_tlast,
+ input wire s_axis_chdr_tvalid,
+ output wire s_axis_chdr_tready,
+ // CHDR output port (with a tdest and tkeep)
+ output wire [WIDTH-1:0] m_axis_chdr_tdata,
+ output wire [DEST_W-1:0] m_axis_chdr_tdest,
+ output wire m_axis_chdr_tkeep,
+ output wire m_axis_chdr_tlast,
+ output wire m_axis_chdr_tvalid,
+ input wire m_axis_chdr_tready,
+ // Find port going to routing table
+ output wire [15:0] m_axis_find_tdata,
+ output wire m_axis_find_tvalid,
+ input wire m_axis_find_tready,
+ // Result port from routing table
+ input wire [DEST_W-1:0] s_axis_result_tdata,
+ input wire s_axis_result_tkeep,
+ input wire s_axis_result_tvalid,
+ output wire s_axis_result_tready
+);
+
+ // RFNoC Includes
+ `include "../core/rfnoc_chdr_utils.vh"
+ `include "../core/rfnoc_chdr_internal_utils.vh"
+
+
+ //---------------------------------------------------------------------------
+ // Packet Buffer
+ //---------------------------------------------------------------------------
+
+ wire [WIDTH-1:0] gate_i_tdata , gate_o_tdata ;
+ wire gate_i_tlast , gate_o_tlast ;
+ wire gate_i_tvalid, gate_o_tvalid;
+ wire gate_i_tready, gate_o_tready;
+
+ // The axi_packet_gate queues up an entire packet before letting it go out.
+ // This reduces congestion in the crossbar for slowly-built packets.
+ axi_packet_gate #(
+ .WIDTH (WIDTH),
+ .SIZE (MTU)
+ ) axi_packet_gate_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata (gate_i_tdata),
+ .i_tlast (gate_i_tlast),
+ .i_terror (1'b0),
+ .i_tvalid (gate_i_tvalid),
+ .i_tready (gate_i_tready),
+ .o_tdata (gate_o_tdata),
+ .o_tlast (gate_o_tlast),
+ .o_tvalid (gate_o_tvalid),
+ .o_tready (gate_o_tready)
+ );
+
+
+ //---------------------------------------------------------------------------
+ // Destination (TDEST) Muxing
+ //---------------------------------------------------------------------------
+
+ wire [15:0] find_tdata;
+ wire find_tvalid, find_tready;
+
+ wire [DEST_W-1:0] dest_i_tdata;
+ wire dest_i_tkeep, dest_i_tvalid, dest_i_tready;
+ wire [DEST_W-1:0] dest_o_tdata;
+ wire dest_o_tkeep, dest_o_tvalid, dest_o_tready;
+
+ // The find_fifo holds the lookup requests from the find_* AXI stream and
+ // sends them on to the m_axis_find_* stream port. It is required because the
+ // input logic (see below) doesn't obey the AXI handshake protocol but this
+ // FIFO can tolerate it.
+ axi_fifo #(
+ .WIDTH (16),
+ .SIZE (1)
+ ) find_fifo_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata (find_tdata),
+ .i_tvalid (find_tvalid),
+ .i_tready (find_tready),
+ .o_tdata (m_axis_find_tdata),
+ .o_tvalid (m_axis_find_tvalid),
+ .o_tready (m_axis_find_tready),
+ .space (),
+ .occupied ()
+ );
+
+ // The destination (TDEST) can come from two sources: Directly from the
+ // packet info (in which case TDEST was immediately determined and comes in
+ // on dest_* AXI stream) or via a lookup (in which case the result comes in
+ // on s_axis_result_*). Only one of these data paths is used at a time, so we
+ // mux them together here create a single stream (dest_o_*) that contains the
+ // destination for the next packet.
+ axi_mux #(
+ .WIDTH (DEST_W+1),
+ .SIZE (2),
+ .PRIO (1),
+ .PRE_FIFO_SIZE (1),
+ .POST_FIFO_SIZE (1)
+ ) dest_mux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata ({dest_i_tkeep, dest_i_tdata,
+ s_axis_result_tkeep, s_axis_result_tdata}),
+ .i_tlast (2'b11),
+ .i_tvalid ({dest_i_tvalid, s_axis_result_tvalid}),
+ .i_tready ({dest_i_tready, s_axis_result_tready}),
+ .o_tdata ({dest_o_tkeep, dest_o_tdata}),
+ .o_tlast (),
+ .o_tvalid (dest_o_tvalid),
+ .o_tready (dest_o_tready)
+ );
+
+
+ //---------------------------------------------------------------------------
+ // Input Logic
+ //---------------------------------------------------------------------------
+ //
+ // When a packet comes in, we may have to do one of the following:
+ // 1) Lookup the TDEST using the EPID
+ // 2) Use the specified input TDEST
+ // 3) Use the NODE_ID as the TDEST (to return the packet)
+ //
+ //---------------------------------------------------------------------------
+
+ // The s_axis_chdr_hdr_valid signal indicates when TDATA and TID contain the
+ // header information for the current packet.
+ reg s_axis_chdr_hdr_valid = 1'b1;
+
+ always @(posedge clk) begin
+ if (reset) begin
+ s_axis_chdr_hdr_valid <= 1'b1;
+ end else if (s_axis_chdr_tvalid & s_axis_chdr_tready) begin
+ s_axis_chdr_hdr_valid <= s_axis_chdr_tlast;
+ end
+ end
+
+ // The dest_find_tready signal indicates if the find_fifo is ready or if the
+ // dest port of the dest_muax is ready, depending on which path will be used.
+ reg dest_find_tready;
+
+ always @(*) begin
+ if (s_axis_chdr_hdr_valid) begin
+ case (s_axis_chdr_tid)
+ CHDR_MGMT_ROUTE_EPID:
+ dest_find_tready = find_tready;
+ CHDR_MGMT_ROUTE_TDEST:
+ dest_find_tready = dest_i_tready;
+ CHDR_MGMT_RETURN_TO_SRC:
+ dest_find_tready = dest_i_tready;
+ default:
+ dest_find_tready = dest_i_tready; // We should never get here
+ endcase
+ end else begin
+ dest_find_tready = 1'b1;
+ end
+ end
+
+ // We can accept a transfer from the input CHDR stream only if the the packet
+ // gate and dest/find datapaths are ready.
+ assign s_axis_chdr_tready = s_axis_chdr_tvalid &&
+ gate_i_tready &&
+ dest_find_tready;
+
+ // The chdr_header_stb signal indicates when we write data into the dest/find
+ // data path. This happens when we're accepting the header word of the packet
+ // into the packet gate.
+ wire chdr_header_stb = s_axis_chdr_tvalid &&
+ s_axis_chdr_tready &&
+ s_axis_chdr_hdr_valid;
+
+ // **************************************************************************
+ // WARNING: The logic below violates AXI-Stream by having a tready -> tvalid
+ // dependency To ensure no deadlocks, we must place FIFOs downstream
+ // of gate_i_*, find_* and dest_i_*
+
+ // Here we decide if we need to do a lookup using the find_* path or if the
+ // destination is known and can be put directly on the dest_* path.
+ //
+ // Start a lookup request if the TID is CHDR_MGMT_ROUTE_EPID.
+ assign find_tdata = chdr_get_dst_epid(s_axis_chdr_tdata[63:0]);
+ assign find_tvalid = chdr_header_stb &&
+ (s_axis_chdr_tid == CHDR_MGMT_ROUTE_EPID);
+ // Set TDEST directly if TID is CHDR_MGMT_ROUTE_TDEST or
+ // CHDR_MGMT_RETURN_TO_SRC.
+ assign dest_i_tdata = (s_axis_chdr_tid == CHDR_MGMT_ROUTE_TDEST) ?
+ s_axis_chdr_tdest : NODE_ID[DEST_W-1:0];
+ assign dest_i_tkeep = 1'b1;
+ assign dest_i_tvalid = chdr_header_stb &&
+ (s_axis_chdr_tid != CHDR_MGMT_ROUTE_EPID);
+
+ // Input logic for axi_packet_gate
+ assign gate_i_tdata = s_axis_chdr_tdata;
+ assign gate_i_tlast = s_axis_chdr_tlast;
+ assign gate_i_tvalid = s_axis_chdr_tready && s_axis_chdr_tvalid;
+
+ //
+ // **************************************************************************
+
+
+ //---------------------------------------------------------------------------
+ // Output Logic
+ //---------------------------------------------------------------------------
+ //
+ // The destination for the packet (TDEST) must be valid before we allow the
+ // header of the packet to pass through. So the packet must be blocked until
+ // the output of the dest_o_* is valid. TDEST and TKEEP must remain valid
+ // until the end of the packet.
+ //
+ //---------------------------------------------------------------------------
+
+ assign m_axis_chdr_tdata = gate_o_tdata;
+ assign m_axis_chdr_tlast = gate_o_tlast;
+ assign m_axis_chdr_tdest = dest_o_tdata;
+ assign m_axis_chdr_tkeep = dest_o_tkeep;
+ assign m_axis_chdr_tvalid = gate_o_tvalid && dest_o_tvalid;
+
+ assign gate_o_tready = m_axis_chdr_tvalid && m_axis_chdr_tready;
+ assign dest_o_tready = m_axis_chdr_tvalid && m_axis_chdr_tready && m_axis_chdr_tlast;
+
+endmodule
+
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v
new file mode 100644
index 000000000..f445efc68
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v
@@ -0,0 +1,122 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: chdr_xb_routing_table
+// Description:
+// A routing table for the CHDR crossbar. This table is designed
+// to be shared between all ports. It has an AXI-Stream lookup
+// interface and a ctrlport (reduced) configuration interface.
+
+module chdr_xb_routing_table #(
+ parameter SIZE = 6,
+ parameter NPORTS = 4,
+ parameter EXT_INS_PORT_EN = 1
+) (
+ // Clocks and resets
+ input wire clk,
+ input wire reset,
+ // Insertion Interface (for XB ports)
+ input wire [NPORTS-1:0] port_req_wr,
+ input wire [(16*NPORTS)-1:0] port_req_addr,
+ input wire [(32*NPORTS)-1:0] port_req_data,
+ output wire [NPORTS-1:0] port_resp_ack,
+ // Insertion Interface (External)
+ input wire ext_req_wr,
+ input wire [15:0] ext_req_addr,
+ input wire [31:0] ext_req_data,
+ output wire ext_resp_ack,
+ // Find Interface
+ input wire [(16*NPORTS)-1:0] axis_find_tdata,
+ input wire [NPORTS-1:0] axis_find_tvalid,
+ output wire [NPORTS-1:0] axis_find_tready,
+ // Result Interface (for Find)
+ output wire [($clog2(NPORTS)*NPORTS)-1:0] axis_result_tdata,
+ output wire [NPORTS-1:0] axis_result_tkeep,
+ output wire [NPORTS-1:0] axis_result_tvalid,
+ input wire [NPORTS-1:0] axis_result_tready
+);
+ localparam NPORTS_W = $clog2(NPORTS);
+ localparam CFG_W = NPORTS_W + 16;
+ localparam CFG_PORTS = NPORTS + EXT_INS_PORT_EN;
+
+ // CAM-based lookup table
+
+ wire [15:0] insert_tdest;
+ wire [NPORTS_W-1:0] insert_tdata;
+ wire insert_tvalid;
+ wire insert_tready;
+
+ axis_muxed_kv_map #(
+ .KEY_WIDTH(16), .VAL_WIDTH(NPORTS_W),
+ .SIZE(SIZE), .NUM_PORTS(NPORTS)
+ ) kv_map_i (
+ .clk (clk ),
+ .reset (reset ),
+ .axis_insert_tdata (insert_tdata ),
+ .axis_insert_tdest (insert_tdest ),
+ .axis_insert_tvalid(insert_tvalid ),
+ .axis_insert_tready(insert_tready ),
+ .axis_find_tdata (axis_find_tdata ),
+ .axis_find_tvalid (axis_find_tvalid ),
+ .axis_find_tready (axis_find_tready ),
+ .axis_result_tdata (axis_result_tdata ),
+ .axis_result_tkeep (axis_result_tkeep ),
+ .axis_result_tvalid(axis_result_tvalid),
+ .axis_result_tready(axis_result_tready)
+ );
+
+ // Logic to convert from ctrlport to AXI-Stream
+
+ wire ins_req_wr [0:CFG_PORTS-1];
+ wire [15:0] ins_req_addr[0:CFG_PORTS-1];
+ wire [NPORTS_W-1:0] ins_req_data[0:CFG_PORTS-1];
+ wire ins_resp_ack[0:CFG_PORTS-1];
+
+ reg [(CFG_PORTS*CFG_W)-1:0] cfg_tdata;
+ reg [CFG_PORTS-1:0] cfg_tvalid = {CFG_PORTS{1'b0}};
+ wire [CFG_PORTS-1:0] cfg_tready;
+
+ genvar i;
+ generate for (i = 0; i < CFG_PORTS; i=i+1) begin
+ assign ins_req_wr [i] = (i < NPORTS) ? port_req_wr[i] : ext_req_wr;
+ assign ins_req_addr[i] = (i < NPORTS) ? port_req_addr[i*16 +: 16] : ext_req_addr;
+ assign ins_req_data[i] = (i < NPORTS) ? port_req_data[i*32 +: NPORTS_W] : ext_req_data[NPORTS_W-1:0];
+ if (i < NPORTS)
+ assign port_resp_ack[i] = ins_resp_ack[i];
+ else
+ assign ext_resp_ack = ins_resp_ack[i];
+
+ always @(posedge clk) begin
+ if (reset) begin
+ cfg_tvalid[i] <= 1'b0;
+ end else begin
+ if (~cfg_tvalid[i]) begin
+ if (ins_req_wr[i]) begin
+ cfg_tvalid[i] <= 1'b1;
+ cfg_tdata[(CFG_W*i) +: CFG_W] <= {ins_req_data[i], ins_req_addr[i]};
+ end
+ end else begin
+ cfg_tvalid[i] <= ~cfg_tready[i];
+ end
+ end
+ end
+ assign ins_resp_ack[i] = cfg_tvalid[i] & cfg_tready[i];
+ end endgenerate
+
+ // Multiplexer between XB ports and external cfg
+
+ axi_mux #(
+ .WIDTH(CFG_W), .SIZE(CFG_PORTS),
+ .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1)
+ ) rtcfg_mux_i (
+ .clk(clk), .reset(reset), .clear(1'b0),
+ .i_tdata(cfg_tdata), .i_tlast({(NPORTS_W + 16){1'b1}}),
+ .i_tvalid(cfg_tvalid), .i_tready(cfg_tready),
+ .o_tdata({insert_tdata, insert_tdest}), .o_tlast(),
+ .o_tvalid(insert_tvalid), .o_tready(insert_tready)
+ );
+
+endmodule
+
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile
new file mode 100644
index 000000000..7fa7ae03b
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile
@@ -0,0 +1,52 @@
+#
+# Copyright 2015 Ettus Research LLC
+#
+
+#-------------------------------------------------
+# Top-of-Makefile
+#-------------------------------------------------
+# Define BASE_DIR to point to the "top" dir
+BASE_DIR = $(abspath ../../../../top)
+# Include viv_sim_preamble after defining BASE_DIR
+include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak
+
+#-------------------------------------------------
+# Design Specific
+#-------------------------------------------------
+# Define part using PART_ID (<device>/<package>/<speedgrade>)
+ARCH = kintex7
+PART_ID = xc7k410t/ffg900/-2
+
+# Include makefiles and sources for the DUT and its dependencies
+include $(BASE_DIR)/../lib/control/Makefile.srcs
+include $(BASE_DIR)/../lib/fifo/Makefile.srcs
+include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs
+include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs
+
+DESIGN_SRCS = $(abspath \
+$(FIFO_SRCS) \
+$(CONTROL_LIB_SRCS) \
+$(RFNOC_XBAR_SRCS) \
+$(RFNOC_CORE_SRCS) \
+)
+
+#-------------------------------------------------
+# Testbench Specific
+#-------------------------------------------------
+# Define only one toplevel module
+TB_TOP_MODULE ?= crossbar_tb
+SIM_TOP = $(TB_TOP_MODULE)
+
+SIM_SRCS = \
+$(abspath chdr_traffic_source_sim.sv) \
+$(abspath chdr_traffic_sink_sim.sv) \
+$(abspath crossbar_tb.sv) \
+$(abspath $(TB_TOP_MODULE).sv)
+
+#-------------------------------------------------
+# Bottom-of-Makefile
+#-------------------------------------------------
+# Include all simulator specific makefiles here
+# Each should define a unique target to simulate
+# e.g. xsim, vsim, etc and a common "clean" target
+include $(BASE_DIR)/../tools/make/viv_simulator.mak
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile
new file mode 100644
index 000000000..0f1a10a6e
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile
@@ -0,0 +1,51 @@
+#
+# Copyright 2015 Ettus Research LLC
+#
+
+#-------------------------------------------------
+# Top-of-Makefile
+#-------------------------------------------------
+# Define BASE_DIR to point to the "top" dir
+BASE_DIR = $(abspath ../../../../../top)
+# Include viv_sim_preamble after defining BASE_DIR
+include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak
+
+#-------------------------------------------------
+# Design Specific
+#-------------------------------------------------
+# Define part using PART_ID (<device>/<package>/<speedgrade>)
+ARCH = kintex7
+PART_ID = xc7k410t/ffg900/-2
+
+# Include makefiles and sources for the DUT and its dependencies
+include $(BASE_DIR)/../lib/control/Makefile.srcs
+include $(BASE_DIR)/../lib/fifo/Makefile.srcs
+include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs
+include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs
+
+DESIGN_SRCS = $(abspath \
+$(FIFO_SRCS) \
+$(CONTROL_LIB_SRCS) \
+$(RFNOC_XBAR_SRCS) \
+$(RFNOC_CORE_SRCS) \
+)
+
+#-------------------------------------------------
+# Testbench Specific
+#-------------------------------------------------
+# Define only one toplevel module
+SIM_TOP = axis_ctrl_crossbar_nxn_tb
+
+SIM_SRCS = \
+$(abspath axis_ctrl_crossbar_nxn_tb.sv) \
+$(abspath ../crossbar_tb.sv) \
+$(abspath ../chdr_traffic_source_sim.sv) \
+$(abspath ../chdr_traffic_sink_sim.sv)
+
+#-------------------------------------------------
+# Bottom-of-Makefile
+#-------------------------------------------------
+# Include all simulator specific makefiles here
+# Each should define a unique target to simulate
+# e.g. xsim, vsim, etc and a common "clean" target
+include $(BASE_DIR)/../tools/make/viv_simulator.mak
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv
new file mode 100644
index 000000000..fa112f5cb
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv
@@ -0,0 +1,26 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+
+`timescale 1ns/1ps
+
+module axis_ctrl_crossbar_nxn_tb();
+ crossbar_tb #(
+ .TEST_NAME ("axis_ctrl_crossbar_nxn_tb"),
+ .ROUTER_IMPL ("axis_ctrl_2d_torus" ), // Router implementation
+ .ROUTER_PORTS (20 ), // Number of ports
+ .ROUTER_DWIDTH (64 ), // Router datapath width
+ .MTU_LOG2 (5 ), // log2 of max packet size for router
+ .NUM_MASTERS (4 ), // Number of data generators in test
+ .TEST_MAX_PACKETS (100 ), // How many packets to stream per test case?
+ .TEST_LPP (20 ), // Lines per packet
+ .TEST_MIN_INJ_RATE (10 ), // Minimum injection rate to test
+ .TEST_MAX_INJ_RATE (40 ), // Maximum injection rate to test
+ .TEST_INJ_RATE_INCR (10 ), // Injection rate increment
+ .TEST_GEN_LL_FILES (0 ) // Generate files to produce load-latency graphs?
+ ) impl (
+ /* no IO */
+ );
+endmodule
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile
new file mode 100644
index 000000000..399515640
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile
@@ -0,0 +1,51 @@
+#
+# Copyright 2015 Ettus Research LLC
+#
+
+#-------------------------------------------------
+# Top-of-Makefile
+#-------------------------------------------------
+# Define BASE_DIR to point to the "top" dir
+BASE_DIR = $(abspath ../../../../../top)
+# Include viv_sim_preamble after defining BASE_DIR
+include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak
+
+#-------------------------------------------------
+# Design Specific
+#-------------------------------------------------
+# Define part using PART_ID (<device>/<package>/<speedgrade>)
+ARCH = kintex7
+PART_ID = xc7k410t/ffg900/-2
+
+# Include makefiles and sources for the DUT and its dependencies
+include $(BASE_DIR)/../lib/control/Makefile.srcs
+include $(BASE_DIR)/../lib/fifo/Makefile.srcs
+include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs
+include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs
+
+DESIGN_SRCS = $(abspath \
+$(FIFO_SRCS) \
+$(CONTROL_LIB_SRCS) \
+$(RFNOC_XBAR_SRCS) \
+$(RFNOC_CORE_SRCS) \
+)
+
+#-------------------------------------------------
+# Testbench Specific
+#-------------------------------------------------
+# Define only one toplevel module
+SIM_TOP = chdr_crossbar_nxn_tb
+
+SIM_SRCS = \
+$(abspath chdr_crossbar_nxn_tb.sv) \
+$(abspath ../crossbar_tb.sv) \
+$(abspath ../chdr_traffic_source_sim.sv) \
+$(abspath ../chdr_traffic_sink_sim.sv)
+
+#-------------------------------------------------
+# Bottom-of-Makefile
+#-------------------------------------------------
+# Include all simulator specific makefiles here
+# Each should define a unique target to simulate
+# e.g. xsim, vsim, etc and a common "clean" target
+include $(BASE_DIR)/../tools/make/viv_simulator.mak
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv
new file mode 100644
index 000000000..1c5cace63
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv
@@ -0,0 +1,26 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+
+`timescale 1ns/1ps
+
+module chdr_crossbar_nxn_tb();
+ crossbar_tb #(
+ .TEST_NAME ("chdr_crossbar_nxn_tb"),
+ .ROUTER_IMPL ("chdr_crossbar_nxn" ), // Router implementation
+ .ROUTER_PORTS (10 ), // Number of ports
+ .ROUTER_DWIDTH (64 ), // Router datapath width
+ .MTU_LOG2 (7 ), // log2 of max packet size for router
+ .NUM_MASTERS (10 ), // Number of data generators in test
+ .TEST_MAX_PACKETS (100 ), // How many packets to stream per test case?
+ .TEST_LPP (100 ), // Lines per packet
+ .TEST_MIN_INJ_RATE (60 ), // Minimum injection rate to test
+ .TEST_MAX_INJ_RATE (100 ), // Maximum injection rate to test
+ .TEST_INJ_RATE_INCR (10 ), // Injection rate increment
+ .TEST_GEN_LL_FILES (0 ) // Generate files to produce load-latency graphs?
+ ) impl (
+ /* no IO */
+ );
+endmodule
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv
new file mode 100644
index 000000000..a9fe3ba27
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv
@@ -0,0 +1,150 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: chdr_traffic_sink_sim
+// Description:
+// A sink for CHDR traffic. Simulation only.
+// Accepts packets and computes the following metrics:
+// - Data integrity errors
+// - Packet latency
+// - Throughput counts
+// All metrics can optionally be written to a file to
+// generate load-latency graphs.
+
+`timescale 1ns/1ps
+
+`include "sim_cvita_lib.svh"
+
+module chdr_traffic_sink_sim #(
+ parameter WIDTH = 64,
+ parameter MTU = 5,
+ parameter [15:0] NODE_ID = 'd0,
+ parameter [15:0] NUM_NODES = 'd16,
+ parameter FILE_PATH = ".",
+ parameter FLUSH_N = 4
+) (
+ // Clocks and resets
+ input clk,
+ input rst,
+ // Settings
+ input [63:0] current_time,
+ input start_stb,
+ input [7:0] injection_rate,
+ input [15:0] lines_per_pkt,
+ input [7:0] traffic_patt,
+ // CHDR master interface
+ input [WIDTH-1:0] s_axis_tdata,
+ input s_axis_tlast,
+ input s_axis_tvalid,
+ output s_axis_tready,
+ // Metrics
+ output session_active,
+ output [31:0] xfer_count,
+ output [31:0] pkt_count,
+ output [31:0] data_err_count,
+ output [31:0] route_err_count
+);
+
+ // Constants
+ localparam integer ERR_BIT_PKT_SIZE_MISMATCH = 1;
+ localparam integer ERR_BIT_PKT_DATA_MISMATCH = 2;
+ localparam integer ERR_BIT_PKT_DEST_MISMATCH = 4;
+ localparam integer ERR_BIT_PKT_SEQUENCE_ERR = 8;
+
+ cvita_slave #(.DWIDTH(WIDTH)) s_chdr (.clk(clk));
+ cvita_pkt_t pkt;
+
+ assign s_chdr.axis.tdata = s_axis_tdata;
+ assign s_chdr.axis.tlast = s_axis_tlast;
+ assign s_chdr.axis.tvalid = s_axis_tvalid;
+ assign s_axis_tready = s_chdr.axis.tready;
+
+ logic running = 0;
+ integer num_data_errs = 0;
+ integer num_route_errs = 0;
+ logic [31:0] num_pkts_xferd = 0;
+ logic [31:0] num_samps_xferd = 0;
+
+ assign data_err_count = num_data_errs;
+ assign route_err_count = num_route_errs;
+ assign xfer_count = num_samps_xferd;
+ assign pkt_count = num_pkts_xferd;
+ assign session_active = running;
+
+ integer session = 0;
+ string filename;
+ integer handle = 0;
+ integer err = 0;
+ integer bus_idle_cnt = 0;
+ logic [WIDTH-1:0] i;
+
+ // Egress buff in source is MTU + 4
+ localparam integer IDLE_TIMEOUT = (1 << (MTU + 4 + FLUSH_N));
+
+ initial begin: consume_blk
+ // Consume infinitely
+ s_chdr.reset();
+ while (1) begin
+ // A session begins on the posedge of start_stb
+ while (~start_stb) @(posedge clk);
+ session = session + 1;
+ $sformat(filename, "%s/pkts_node%05d_inj%03d_lpp%05d_traffic%c_sess%04d.csv",
+ FILE_PATH, NODE_ID, injection_rate, lines_per_pkt, traffic_patt, session);
+ if (FILE_PATH != "") begin
+ handle = $fopen(filename, "w");
+ if (handle == 0) begin
+ $error("Could not open file: %s", filename);
+ $finish();
+ end
+ end
+ if (handle != 0) $fdisplay(handle, "Src,Dst,Seqno,Error,Latency");
+ s_chdr.reset();
+ num_data_errs = 0;
+ num_route_errs = 0;
+ num_pkts_xferd = 0;
+ num_samps_xferd = 0;
+ bus_idle_cnt = 0;
+ running = 1;
+ while (1) begin
+ // Pull packet from bus
+ err = 0;
+ if (~s_chdr.axis.tvalid[0]) begin
+ @(posedge clk);
+ bus_idle_cnt = bus_idle_cnt + 1;
+ if (bus_idle_cnt <= IDLE_TIMEOUT)
+ continue;
+ else
+ break;
+ end
+ s_chdr.pull_pkt(pkt, 0);
+ bus_idle_cnt = 0;
+ num_pkts_xferd = num_pkts_xferd + 1;
+ num_samps_xferd = num_samps_xferd + lines_per_pkt;
+ // Validate packet
+ if (pkt.hdr.dst_sid != NODE_ID) begin
+ err = err + ERR_BIT_PKT_DEST_MISMATCH;
+ num_route_errs = num_route_errs + 1;
+ end
+ if (pkt.payload.size() != lines_per_pkt-2) begin
+ err = err + ERR_BIT_PKT_SIZE_MISMATCH;
+ num_data_errs = num_data_errs + 1;
+ end else begin
+ for (i = 'd0; i < (lines_per_pkt-2); i=i+1) begin
+ if (pkt.payload[i] != i) begin
+ err = err + ERR_BIT_PKT_DATA_MISMATCH;
+ num_data_errs = num_data_errs + 1;
+ break;
+ end
+ end
+ end
+ if (handle != 0) $fdisplay(handle, "%00d,%00d,%00d,%00d,%00d",
+ pkt.hdr.src_sid, pkt.hdr.dst_sid, pkt.hdr.seqnum, err, (current_time - pkt.hdr.timestamp));
+ end
+ running = 0;
+ if (handle != 0) $fclose(handle);
+ end
+ end
+
+endmodule \ No newline at end of file
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv
new file mode 100644
index 000000000..8c3d974c9
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv
@@ -0,0 +1,202 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: chdr_traffic_source_sim
+// Description:
+// A traffic generator for CHDR traffic. Simulation only.
+// Supports multiple traffic pattern and injection rates.
+//
+
+`timescale 1ns/1ps
+
+`include "sim_cvita_lib.svh"
+
+module chdr_traffic_source_sim #(
+ parameter WIDTH = 64, // Width of the AXI-Stream data bus
+ parameter MTU = 5, // log2 of the max number of lines in a packet
+ parameter [15:0] NODE_ID = 'd0, // Node ID for this generator
+ parameter [15:0] NUM_NODES = 'd16 // Total number of generators in the application
+) (
+ // Clocks and resets
+ input clk, // AXI-Stream clock
+ input rst, // AXI-Stream reset
+ // Settings
+ input [63:0] current_time, // The current value of the global timebase (synch to clk)
+ input start_stb, // A strobe that indicates the start of a generation session
+ input [7:0] injection_rate, // The inject rate (in percent) to simulate
+ input [15:0] lines_per_pkt, // Number of lines per packet to generate
+ input [7:0] traffic_patt, // The traffic pattern (see localparams below for values)
+ input [31:0] num_pkts_to_send, // Number of packets to send
+ // CHDR master interface
+ output [WIDTH-1:0] m_axis_tdata, // AXI-Stream master tdata
+ output m_axis_tlast, // AXI-Stream master tlast
+ output m_axis_tvalid, // AXI-Stream master tvalid
+ input m_axis_tready, // AXI-Stream master tready
+ // Metrics
+ output session_active, // Signal indicating if generation session is active
+ output [63:0] session_duration, // Session duration (only valid after session ends)
+ output [31:0] xfer_count, // Number of lines transferred (only valid after session ends)
+ output [31:0] pkt_count // Number of packets transferred (only valid after session ends)
+);
+ // **** Supported Traffic Patters ****
+ localparam [7:0] TRAFFIC_PATT_LOOPBACK = 8'd76; //L
+ localparam [7:0] TRAFFIC_PATT_NEIGHBOR = 8'd78; //N
+ localparam [7:0] TRAFFIC_PATT_BIT_COMPLEMENT = 8'd67; //C
+ localparam [7:0] TRAFFIC_PATT_SEQUENTIAL = 8'd83; //S
+ localparam [7:0] TRAFFIC_PATT_UNIFORM = 8'd85; //U
+ localparam [7:0] TRAFFIC_PATT_UNIFORM_OTHERS = 8'd79; //O
+ localparam [7:0] TRAFFIC_PATT_RANDOM_PERM = 8'd82; //R
+
+ cvita_master #(.DWIDTH(WIDTH)) m_chdr (.clk(clk));
+ axis_t #(.DWIDTH(WIDTH)) post_fifo (.clk(clk));
+ axis_t #(.DWIDTH(WIDTH)) pre_gate (.clk(clk));
+ cvita_hdr_t header;
+ reg throttle = 1'b1;
+
+ logic running = 0;
+ logic [31:0] curr_pkt_num = 'd0;
+ logic [31:0] num_samps_xferd = 'd0;
+ logic [63:0] start_time = 0;
+ logic [63:0] stop_time = 0;
+ logic [15:0] last_gen_sid = (NODE_ID - 16'd1);
+
+ assign xfer_count = num_samps_xferd;
+ assign pkt_count = curr_pkt_num;
+ assign session_duration = (stop_time - start_time);
+ assign session_active = running;
+
+ // Utility function to assign SIDs based on traffic pattern
+ function [15:0] gen_dst_sid;
+ input [7:0] traffic_patt;
+ input [15:0] last_sid;
+
+ if (traffic_patt == TRAFFIC_PATT_UNIFORM) begin
+ gen_dst_sid = $urandom_range('d0, NUM_NODES-'d1);
+ end else if (traffic_patt == TRAFFIC_PATT_UNIFORM_OTHERS) begin
+ logic [31:0] rnum = $urandom_range('d0, NUM_NODES-'d2);
+ if (rnum < NODE_ID)
+ gen_dst_sid = rnum[15:0];
+ else
+ gen_dst_sid = rnum[15:0] + 16'd1;
+ end else if (traffic_patt == TRAFFIC_PATT_SEQUENTIAL) begin
+ gen_dst_sid = (last_sid + 16'd1) % NUM_NODES;
+ end else if (traffic_patt == TRAFFIC_PATT_NEIGHBOR) begin
+ gen_dst_sid = (NODE_ID + 16'd1) % NUM_NODES;
+ end else if (traffic_patt == TRAFFIC_PATT_LOOPBACK) begin
+ gen_dst_sid = NODE_ID;
+ end else if (traffic_patt == TRAFFIC_PATT_BIT_COMPLEMENT) begin
+ gen_dst_sid = (NUM_NODES - NODE_ID - 1) % NUM_NODES;
+ end else if (traffic_patt == TRAFFIC_PATT_RANDOM_PERM) begin
+ //TODO: Implement me
+ gen_dst_sid = 0;
+ end else begin
+ gen_dst_sid = 'd0;
+ end
+ endfunction
+
+ // Generation loop. Push to m_chdr infinitely fast
+ initial begin: gen_blk
+ // Generate infinitely
+ $srandom(NODE_ID + NUM_NODES);
+ m_chdr.reset();
+ while (1) begin
+ // A generation session begins on the posedge of start_stb
+ while (~start_stb) @(posedge clk);
+ curr_pkt_num = 'd0;
+ m_chdr.reset();
+ num_samps_xferd = 'd0;
+ start_time = current_time;
+ running = 1;
+ while (curr_pkt_num < num_pkts_to_send) begin
+ header = '{
+ pkt_type:DATA, has_time:1, eob:0,
+ seqnum:curr_pkt_num[11:0], length:(lines_per_pkt*8),
+ src_sid:NODE_ID, dst_sid:gen_dst_sid(traffic_patt, last_gen_sid),
+ timestamp:0 //TS attached later
+ };
+ last_gen_sid = header.dst_sid;
+ curr_pkt_num = curr_pkt_num + 'd1;
+ m_chdr.push_ramp_pkt(lines_per_pkt-2, 'h0, 'h1, header);
+ num_samps_xferd = num_samps_xferd + lines_per_pkt;
+ end
+ running = 0;
+ stop_time = current_time;
+ end
+ end
+
+ // Capture packets in a really short FIFO (for backpressure)
+ axi_fifo #(
+ .WIDTH(WIDTH+1), .SIZE(MTU + 1)
+ ) fifo_i (
+ .clk (clk),
+ .reset (rst),
+ .clear (1'b0),
+ .i_tdata ({m_chdr.axis.tlast, m_chdr.axis.tdata}),
+ .i_tvalid (m_chdr.axis.tvalid),
+ .i_tready (m_chdr.axis.tready),
+ .o_tdata ({post_fifo.tlast, post_fifo.tdata}),
+ .o_tvalid (post_fifo.tvalid),
+ .o_tready (post_fifo.tready),
+ .space (),
+ .occupied ()
+ );
+
+ // Attach timestamp after the packet leaves the FIFO after
+ // throttling.
+
+ localparam [1:0] ST_HDR = 2'd0;
+ localparam [1:0] ST_TS = 2'd1;
+ localparam [1:0] ST_BODY = 2'd2;
+
+ reg [1:0] pkt_state = ST_HDR;
+ always_ff @(posedge clk) begin
+ if (rst) begin
+ pkt_state <= ST_HDR;
+ end else if (pre_gate.tvalid & pre_gate.tready) begin
+ case (pkt_state)
+ ST_HDR:
+ if (~pre_gate.tlast)
+ pkt_state <= pre_gate.tdata[61] ? ST_TS : ST_BODY;
+ ST_TS:
+ pkt_state <= pre_gate.tlast ? ST_HDR : ST_BODY;
+ ST_BODY:
+ pkt_state <= pre_gate.tlast ? ST_HDR : ST_BODY;
+ default:
+ pkt_state <= ST_HDR;
+ endcase
+ end
+ end
+
+ // Enforce injection rate by pulling from FIFO with a certain time probability
+ always_ff @(posedge clk) begin
+ throttle <= ($urandom_range(32'd99, 32'd0) > {24'h0, injection_rate});
+ end
+
+ // Insert timestamp + throttle logic
+ assign pre_gate.tdata = (pkt_state == ST_TS) ? current_time : post_fifo.tdata;
+ assign pre_gate.tlast = post_fifo.tlast;
+ assign pre_gate.tvalid = post_fifo.tvalid & ~throttle;
+ assign post_fifo.tready = pre_gate.tready & ~throttle;
+
+ // Gate the packet to smooth out throttle-related noise.
+ // This also serves as a buffer for the packet in case things are backed up
+ axi_packet_gate #(
+ .WIDTH(WIDTH), .SIZE(MTU + 4), .USE_AS_BUFF(1)
+ ) pkt_gate_i (
+ .clk (clk),
+ .reset (rst),
+ .clear (1'b0),
+ .i_tdata (pre_gate.tdata),
+ .i_tlast (pre_gate.tlast),
+ .i_terror (1'b0),
+ .i_tvalid (pre_gate.tvalid),
+ .i_tready (pre_gate.tready),
+ .o_tdata (m_axis_tdata),
+ .o_tlast (m_axis_tlast),
+ .o_tvalid (m_axis_tvalid),
+ .o_tready (m_axis_tready)
+ );
+
+endmodule \ No newline at end of file
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv
new file mode 100644
index 000000000..fc9d53fe7
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv
@@ -0,0 +1,428 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+
+`timescale 1ns/1ps
+`define NS_PER_TICK 1
+`define NUM_TEST_CASES 7
+
+`include "sim_clks_rsts.vh"
+`include "sim_exec_report.vh"
+`include "sim_set_rb_lib.svh"
+`include "sim_axis_lib.svh"
+
+`define SIM_TIMEOUT_US 1000000 // Default: 1s
+
+module crossbar_tb #(
+ parameter TEST_NAME = "crossbar_tb",
+ // Router parameters
+ parameter ROUTER_IMPL = "axi_crossbar", // Router implementation
+ parameter ROUTER_PORTS = 10, // # Router ports
+ parameter ROUTER_DWIDTH = 64, // Router datapath width
+ parameter MTU_LOG2 = 7, // log2 of max packet size for router
+ parameter NUM_MASTERS = ROUTER_PORTS, // Number of data generators in test
+ // Test parameters
+ parameter TEST_MAX_PACKETS = 50, // How many packets to stream per test case?
+ parameter TEST_LPP = 50, // Lines per packet
+ parameter TEST_MIN_INJ_RATE = 60, // Minimum injection rate to test
+ parameter TEST_MAX_INJ_RATE = 100, // Maximum injection rate to test
+ parameter TEST_INJ_RATE_INCR = 10, // Injection rate increment
+ parameter TEST_GEN_LL_FILES = 0 // Generate files to produce load-latency graphs?
+
+)(
+ /* no IO */
+);
+ `TEST_BENCH_INIT(TEST_NAME,`NUM_TEST_CASES,`NS_PER_TICK)
+
+ //----------------------------------------------------
+ // General test setup
+ //----------------------------------------------------
+
+ // Clocks and reset
+ `DEFINE_CLK(clk, 5.000, 50)
+ `DEFINE_RESET(rst, 0, 10)
+
+ // Timekeeper (cycle counter)
+ logic [63:0] timestamp;
+ initial begin : timekeeper_blk
+ while (rst) @(posedge clk);
+ timestamp = 'd0;
+ while (~rst) begin
+ @(posedge clk);
+ timestamp = timestamp + 'd1;
+ end
+ end
+
+ //----------------------------------------------------
+ // Instantiate traffic generators, checkers, buses
+ //----------------------------------------------------
+ localparam FILE_PATH = {`WORKING_DIR, "/data/", ROUTER_IMPL};
+
+ // Data buses
+ axis_t #(.DWIDTH(ROUTER_DWIDTH), .NUM_STREAMS(ROUTER_PORTS)) src2rtr_axis (.clk(clk));
+ axis_t #(.DWIDTH(ROUTER_DWIDTH), .NUM_STREAMS(ROUTER_PORTS)) rtr2snk_axis (.clk(clk));
+
+ // Control buses
+ settings_bus_master #(.SR_AWIDTH(16), .SR_DWIDTH(32)) rtr_sb (.clk(clk));
+ wire rtr_sb_ack;
+
+ // Test vector source and sink instantiation
+ logic [7:0] set_injection_rate;
+ logic [15:0] set_lines_per_pkt;
+ logic [7:0] set_traffic_patt;
+ logic [31:0] set_num_pkts_to_send;
+ logic snk_start_stb = 0;
+ logic src_start_stb = 0;
+
+ wire [63:0] session_duration [0:ROUTER_PORTS-1];
+ wire [ROUTER_PORTS-1:0] src_active;
+ wire [31:0] src_xfer_count [0:ROUTER_PORTS-1];
+ wire [31:0] src_pkt_count [0:ROUTER_PORTS-1];
+ wire [ROUTER_PORTS-1:0] snk_active;
+ wire [31:0] snk_xfer_count [0:ROUTER_PORTS-1];
+ wire [31:0] snk_pkt_count [0:ROUTER_PORTS-1];
+ wire [31:0] snk_data_err_count [0:ROUTER_PORTS-1];
+ wire [31:0] snk_route_err_count[0:ROUTER_PORTS-1];
+
+ wire deadlock_detected;
+ reg deadlock_detected_del = 1'b0;
+ always @(posedge clk) deadlock_detected_del <= deadlock_detected;
+ wire deadlock_re = (deadlock_detected & ~deadlock_detected_del);
+ wire deadlock_fe = (~deadlock_detected & deadlock_detected_del);
+
+ genvar i;
+ generate for (i = 0; i < ROUTER_PORTS; i=i+1) begin: src_snk_blk
+ chdr_traffic_source_sim #(
+ .WIDTH (ROUTER_DWIDTH),
+ .MTU (MTU_LOG2),
+ .NODE_ID (i),
+ .NUM_NODES (ROUTER_PORTS)
+ ) traffic_src (
+ .clk (clk),
+ .rst (rst),
+ .current_time (timestamp),
+ .start_stb (src_start_stb & (i < NUM_MASTERS)),
+ .injection_rate (set_injection_rate),
+ .lines_per_pkt (set_lines_per_pkt),
+ .traffic_patt (set_traffic_patt),
+ .num_pkts_to_send (set_num_pkts_to_send),
+ .m_axis_tdata (src2rtr_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]),
+ .m_axis_tlast (src2rtr_axis.tlast[i]),
+ .m_axis_tvalid (src2rtr_axis.tvalid[i]),
+ .m_axis_tready (src2rtr_axis.tready[i]),
+ .session_active (src_active[i]),
+ .session_duration (session_duration[i]),
+ .xfer_count (src_xfer_count[i]),
+ .pkt_count (src_pkt_count[i])
+ );
+
+ chdr_traffic_sink_sim #(
+ .WIDTH (ROUTER_DWIDTH),
+ .MTU (MTU_LOG2),
+ .NODE_ID (i),
+ .NUM_NODES (ROUTER_PORTS),
+ .FILE_PATH (TEST_GEN_LL_FILES==1 ? FILE_PATH : "")
+ ) traffic_sink (
+ .clk (clk),
+ .rst (rst),
+ .current_time (timestamp),
+ .start_stb (snk_start_stb),
+ .injection_rate (set_injection_rate),
+ .lines_per_pkt (set_lines_per_pkt),
+ .traffic_patt (set_traffic_patt),
+ .s_axis_tdata (rtr2snk_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]),
+ .s_axis_tlast (rtr2snk_axis.tlast[i]),
+ .s_axis_tvalid (rtr2snk_axis.tvalid[i]),
+ .s_axis_tready (rtr2snk_axis.tready[i]),
+ .session_active (snk_active[i]),
+ .xfer_count (snk_xfer_count[i]),
+ .pkt_count (snk_pkt_count[i]),
+ .data_err_count (snk_data_err_count[i]),
+ .route_err_count (snk_route_err_count[i])
+ );
+ end endgenerate
+
+ //----------------------------------------------------
+ // Instantiate DUT
+ //----------------------------------------------------
+ generate if (ROUTER_IMPL == "FIFO") begin
+ for (i = 0; i < ROUTER_PORTS; i=i+1) begin
+ axi_fifo #(
+ .WIDTH(ROUTER_DWIDTH+1), .SIZE(0)
+ ) fifo_i (
+ .clk (clk),
+ .reset (rst),
+ .clear (1'b0),
+ .i_tdata ({src2rtr_axis.tlast[i], src2rtr_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]}),
+ .i_tvalid (src2rtr_axis.tvalid[i]),
+ .i_tready (src2rtr_axis.tready[i]),
+ .o_tdata ({rtr2snk_axis.tlast[i], rtr2snk_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]}),
+ .o_tvalid (rtr2snk_axis.tvalid[i]),
+ .o_tready (rtr2snk_axis.tready[i]),
+ .space (),
+ .occupied ()
+ );
+ end
+ end else if (ROUTER_IMPL == "axi_crossbar") begin
+ axi_crossbar #(
+ .BASE (0),
+ .FIFO_WIDTH (ROUTER_DWIDTH),
+ .DST_WIDTH (16),
+ .NUM_INPUTS (ROUTER_PORTS),
+ .NUM_OUTPUTS (ROUTER_PORTS)
+ ) router_dut_i (
+ // General
+ .clk (clk),
+ .reset (rst),
+ .clear (1'b0),
+ .local_addr (8'd0),
+ // Inputs
+ .i_tdata (src2rtr_axis.tdata),
+ .i_tlast (src2rtr_axis.tlast),
+ .i_tvalid (src2rtr_axis.tvalid),
+ .i_tready (src2rtr_axis.tready),
+ .pkt_present (src2rtr_axis.tvalid),
+ // Output
+ .o_tdata (rtr2snk_axis.tdata),
+ .o_tlast (rtr2snk_axis.tlast),
+ .o_tvalid (rtr2snk_axis.tvalid),
+ .o_tready (rtr2snk_axis.tready),
+ // Setting Bus
+ .set_stb (rtr_sb.settings_bus.set_stb),
+ .set_addr (rtr_sb.settings_bus.set_addr),
+ .set_data (rtr_sb.settings_bus.set_data),
+ // Readback bus
+ .rb_rd_stb (1'b0),
+ .rb_addr ({(2*$clog2(ROUTER_PORTS)){1'b0}}),
+ .rb_data ()
+ );
+ end else if (ROUTER_IMPL == "chdr_crossbar_nxn") begin
+ chdr_crossbar_nxn #(
+ .CHDR_W (ROUTER_DWIDTH),
+ .NPORTS (ROUTER_PORTS),
+ .DEFAULT_PORT (0),
+ .MTU (MTU_LOG2),
+ .ROUTE_TBL_SIZE (6),
+ .MUX_ALLOC ("ROUND-ROBIN"),
+ .OPTIMIZE ("AREA"),
+ .NPORTS_MGMT (0),
+ .EXT_RTCFG_PORT (1)
+ ) router_dut_i (
+ // General
+ .clk (clk),
+ .reset (rst),
+ // Inputs
+ .s_axis_tdata (src2rtr_axis.tdata),
+ .s_axis_tlast (src2rtr_axis.tlast),
+ .s_axis_tvalid (src2rtr_axis.tvalid),
+ .s_axis_tready (src2rtr_axis.tready),
+ // Output
+ .m_axis_tdata (rtr2snk_axis.tdata),
+ .m_axis_tlast (rtr2snk_axis.tlast),
+ .m_axis_tvalid (rtr2snk_axis.tvalid),
+ .m_axis_tready (rtr2snk_axis.tready),
+ // External router config
+ .ext_rtcfg_stb (rtr_sb.settings_bus.set_stb),
+ .ext_rtcfg_addr (rtr_sb.settings_bus.set_addr),
+ .ext_rtcfg_data (rtr_sb.settings_bus.set_data),
+ .ext_rtcfg_ack (rtr_sb_ack)
+ );
+ end else begin
+ axis_ctrl_crossbar_nxn #(
+ .WIDTH (ROUTER_DWIDTH),
+ .NPORTS (ROUTER_PORTS),
+ .TOPOLOGY (ROUTER_IMPL == "axis_ctrl_2d_torus" ? "TORUS" : "MESH"),
+ .INGRESS_BUFF_SIZE(MTU_LOG2),
+ .ROUTER_BUFF_SIZE (MTU_LOG2),
+ .ROUTING_ALLOC ("WORMHOLE"),
+ .SWITCH_ALLOC ("PRIO")
+ ) router_dut_i (
+ // General
+ .clk (clk),
+ .reset (rst),
+ // Inputs
+ .s_axis_tdata (src2rtr_axis.tdata),
+ .s_axis_tlast (src2rtr_axis.tlast),
+ .s_axis_tvalid (src2rtr_axis.tvalid),
+ .s_axis_tready (src2rtr_axis.tready),
+ // Output
+ .m_axis_tdata (rtr2snk_axis.tdata),
+ .m_axis_tlast (rtr2snk_axis.tlast),
+ .m_axis_tvalid (rtr2snk_axis.tvalid),
+ .m_axis_tready (rtr2snk_axis.tready),
+ // Deadlock detection
+ .deadlock_detected(deadlock_detected)
+ );
+ end endgenerate
+
+ //----------------------------------------------------
+ // Test routine. Runs tests and writes metrics to file
+ //----------------------------------------------------
+
+ // Constants
+ localparam [7:0] TRAFFIC_PATT_LOOPBACK = 8'd76; //L
+ localparam [7:0] TRAFFIC_PATT_NEIGHBOR = 8'd78; //N
+ localparam [7:0] TRAFFIC_PATT_BIT_COMPLEMENT = 8'd67; //C
+ localparam [7:0] TRAFFIC_PATT_SEQUENTIAL = 8'd83; //S
+ localparam [7:0] TRAFFIC_PATT_UNIFORM = 8'd85; //U
+ localparam [7:0] TRAFFIC_PATT_UNIFORM_OTHERS = 8'd79; //O
+ localparam [7:0] TRAFFIC_PATT_RANDOM_PERM = 8'd82; //R
+
+ string filename;
+ integer node;
+ integer session = 0;
+ integer handle = 0;
+ logic [63:0] start_time;
+ integer total_pkts_recvd = 0, total_pkts_sent = 0;
+
+ task sim_dataflow;
+ input [7:0] injection_rate;
+ input [7:0] traffic_patt;
+ input [15:0] lines_per_pkt;
+ input [31:0] num_pkts_to_send;
+ begin
+ session = session + 1;
+ $display("--------------- New Simulation ---------------");
+ $display("- Module = %s", ROUTER_IMPL);
+ $display("- Nodes = %00d", ROUTER_PORTS);
+ $display("- Injection Rate = %00d%%", injection_rate);
+ $display("- Traffic Pattern = %c", traffic_patt);
+ $display("- Packet Size = %00d words (%00d bits)", lines_per_pkt, ROUTER_DWIDTH);
+ $display("- Max Packets = %00d", num_pkts_to_send);
+ // Configure settings
+ @(posedge clk);
+ set_injection_rate = injection_rate;
+ set_lines_per_pkt = lines_per_pkt;
+ set_traffic_patt = traffic_patt;
+ set_num_pkts_to_send = num_pkts_to_send;
+ @(posedge clk);
+ // Start the sink then the source
+ $display("Data flow starting...");
+ snk_start_stb = 1;
+ src_start_stb = 1;
+ @(posedge clk);
+ src_start_stb = 0;
+ snk_start_stb = 0;
+ @(posedge clk);
+ start_time = timestamp;
+ // Wait for source blocks to finish generating
+ $display("Waiting for packets to transmit... (may take a while)");
+ while (|src_active) begin
+ @(posedge clk);
+ if (deadlock_re) $display("WARNING: Deadlock detected");
+ if (deadlock_fe) $display("Recovered from deadlock");
+ end
+ // Wait for sink blocks to finish consuming
+ $display("All packets transmitted. Waiting to flush...");
+ while (|snk_active) @(posedge clk);
+ // If router deadlocks then wait for it to recover
+ if (deadlock_detected) begin
+ $display("Waiting for deadlock recovery to finish...");
+ while (deadlock_detected) @(posedge clk);
+ end
+ repeat(set_lines_per_pkt) @(posedge clk);
+ // Record summary to file and print to console
+ $sformat(filename, "%s/info_inj%03d_lpp%05d_traffic%c_sess%04d.csv",
+ FILE_PATH, injection_rate, lines_per_pkt, traffic_patt, session);
+ if (TEST_GEN_LL_FILES == 1) begin
+ handle = $fopen(filename, "w");
+ if (handle == 0) begin
+ $error("Could not open file: %s", filename);
+ $finish();
+ end
+ end
+ if (handle != 0) $fdisplay(handle, "Impl,Node,TxPkts,RxPkts,Duration,ErrRoute,ErrData");
+ total_pkts_sent = 0;
+ total_pkts_recvd = 0;
+ for (node = 0; node < ROUTER_PORTS; node=node+1) begin
+ $display("- Node #%03d: TX = %5d pkts, RX = %5d pkts, Inj Rate = %3d%%. Errs = %5d route, %5d data",
+ node,src_pkt_count[node], snk_pkt_count[node], ((src_xfer_count[node]*100)/session_duration[node]),
+ snk_route_err_count[node], snk_data_err_count[node]);
+ if (handle != 0) $fdisplay(handle, "%s,%00d,%00d,%00d,%00d,%00d,%00d", ROUTER_IMPL,
+ node,src_pkt_count[node], snk_pkt_count[node], session_duration[node],
+ snk_route_err_count[node], snk_data_err_count[node]);
+ total_pkts_sent = total_pkts_sent + src_pkt_count[node];
+ total_pkts_recvd = total_pkts_recvd + snk_pkt_count[node];
+ `ASSERT_ERROR(snk_route_err_count[node] == 0, "Routing errors. Received packets destined to other nodes");
+ `ASSERT_ERROR(snk_data_err_count[node] == 0, "Integrity errors. Received corrupted packets");
+ end
+ $display("Finished. Elapsed = %00d cycles, TX = %00d pkts, RX = %00d pkts",
+ (timestamp - start_time), total_pkts_sent, total_pkts_recvd);
+ `ASSERT_ERROR(total_pkts_recvd == total_pkts_sent, "Total # TX packets did not match the total # RX packets");
+ if (handle != 0) $fclose(handle);
+ $display("----------------------------------------------");
+ end
+ endtask
+
+ //----------------------------------------------------
+ // Main test loop
+ //----------------------------------------------------
+
+ logic [31:0] MAX_PACKETS = TEST_MAX_PACKETS;
+ logic [15:0] LPP = TEST_LPP;
+ integer MIN_INJ_RATE = TEST_MIN_INJ_RATE;
+ integer MAX_INJ_RATE = TEST_MAX_INJ_RATE;
+ integer INJ_RATE_INCR = TEST_INJ_RATE_INCR;
+
+ integer inj_rate = 0;
+ initial begin : tb_main
+ src_start_stb = 0;
+ snk_start_stb = 0;
+ rtr_sb.reset();
+ while (rst) @(posedge clk);
+
+ repeat (10) @(posedge clk);
+
+ `TEST_CASE_START("Set up crossbar");
+ for (node = 0; node < ROUTER_PORTS; node=node+1) begin
+ if (ROUTER_IMPL == "axi_crossbar") begin
+ rtr_sb.write(16'd256 + node[15:0], {16'h0, node[15:0]});
+ end else if (ROUTER_IMPL == "chdr_crossbar_nxn") begin
+ rtr_sb.write(node[15:0], {16'h0, node[15:0]});
+ while (~rtr_sb_ack) @(posedge clk);
+ end
+ end
+ `TEST_CASE_DONE(1)
+
+ `TEST_CASE_START("Simulate LOOPBACK Traffic Pattern");
+ for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin
+ sim_dataflow(inj_rate, TRAFFIC_PATT_LOOPBACK, LPP, MAX_PACKETS);
+ end
+ `TEST_CASE_DONE(1)
+
+ `TEST_CASE_START("Simulate SEQUENTIAL Traffic Pattern");
+ for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin
+ sim_dataflow(inj_rate, TRAFFIC_PATT_SEQUENTIAL, LPP, MAX_PACKETS);
+ end
+ `TEST_CASE_DONE(1)
+
+ `TEST_CASE_START("Simulate UNIFORM Traffic Pattern");
+ for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin
+ sim_dataflow(inj_rate, TRAFFIC_PATT_UNIFORM, LPP, MAX_PACKETS);
+ end
+ `TEST_CASE_DONE(1)
+
+ `TEST_CASE_START("Simulate UNIFORM_OTHERS Traffic Pattern");
+ for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin
+ sim_dataflow(inj_rate, TRAFFIC_PATT_UNIFORM_OTHERS, LPP, MAX_PACKETS);
+ end
+ `TEST_CASE_DONE(1)
+
+ `TEST_CASE_START("Simulate BIT_COMPLEMENT Traffic Pattern");
+ for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin
+ sim_dataflow(inj_rate, TRAFFIC_PATT_BIT_COMPLEMENT, LPP, MAX_PACKETS);
+ end
+ `TEST_CASE_DONE(1)
+
+ `TEST_CASE_START("Simulate NEIGHBOR Traffic Pattern");
+ for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin
+ sim_dataflow(inj_rate, TRAFFIC_PATT_NEIGHBOR, LPP, MAX_PACKETS);
+ end
+ `TEST_CASE_DONE(1)
+
+ `TEST_BENCH_DONE
+ end // initial begin
+
+endmodule
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py
new file mode 100755
index 000000000..35821c2c4
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+#
+# Copyright 2018 Ettus Research, A National Instruments Company
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+# Description
+# Parses the output files generated by crossbar_tb and outputs
+# a load-latency graph and a expected-actual throughput graph
+
+import os, sys
+import argparse
+import time
+import glob
+import csv
+import re
+import numpy as np
+
+import matplotlib
+#matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+def get_options():
+ parser = argparse.ArgumentParser(description='Generate Load Latency Graphs')
+ parser.add_argument('datadir', type=str, default='.', help='Location of packet capture files generated by testbench')
+ return parser.parse_args()
+
+TRAFFIC_PATTERNS = {'U':'UNIFORM', 'O':'UNIFORM_OTHERS', 'N':'NEIGHBOR', 'L':'LOOPBACK', 'S':'SEQUENTIAL', 'C':'BIT_COMPLEMENT', 'R':'RANDOM_PERM'}
+
+class InfoFile():
+ def __init__(self, filename):
+ # Extract test info from filename
+ m = re.search(r".*/info_inj([0-9]+)_lpp([0-9]+)_traffic(.)_sess([0-9]+)\.csv", filename)
+ if m is None:
+ raise ValueError('Incorrect filename format: %s'%(filename))
+ self.inj_rate = int(m.group(1))
+ self.lpp = int(m.group(2))
+ self.traffic_patt = TRAFFIC_PATTERNS[m.group(3)]
+ self.session = int(m.group(4))
+
+ self.tx_pkts = 0
+ self.rx_pkts = 0
+ self.duration = 0
+ self.errs = 0
+ self.nodes = 0
+ with open(filename, 'r') as csvfile:
+ reader = csv.reader(csvfile, delimiter=',')
+ isheader = True
+ for row in reader:
+ if isheader:
+ isheader = False
+ if row != ['Impl', 'Node', 'TxPkts', 'RxPkts', 'Duration', 'ErrRoute', 'ErrData']:
+ raise ValueError('Incorrect header: %s'%(filename))
+ else:
+ self.impl = row[0]
+ self.tx_pkts = self.tx_pkts + int(row[2])
+ self.rx_pkts = self.tx_pkts + int(row[3])
+ self.duration = self.duration + int(row[4])
+ self.errs = self.errs + int(row[5]) + int(row[6])
+ self.nodes = self.nodes + 1
+ self.real_inj_rate = (100.0 * self.tx_pkts * self.lpp) / self.duration
+
+class PktFile():
+ def __init__(self, filename):
+ # Extract test info from filename
+ m = re.search(r".*/pkts_node([0-9]+)_inj([0-9]+)_lpp([0-9]+)_traffic(.)_sess([0-9]+)\.csv", filename)
+ if m is None:
+ raise ValueError('Incorrect filename format: %s'%(filename))
+ self.node = int(m.group(1))
+ self.inj_rate = int(m.group(2))
+ self.lpp = int(m.group(3))
+ self.traffic_patt = TRAFFIC_PATTERNS[m.group(4)]
+ self.session = int(m.group(5))
+
+ self.latencies = []
+ with open(filename, 'r') as csvfile:
+ reader = csv.reader(csvfile, delimiter=',')
+ isheader = True
+ for row in reader:
+ if isheader:
+ isheader = False
+ if row != ['Src', 'Dst', 'Seqno', 'Error', 'Latency']:
+ raise ValueError('Incorrect header: %s'%(filename))
+ else:
+ self.latencies.append(int(row[4]))
+
+
+########################################################################
+# main
+########################################################################
+if __name__=='__main__':
+ options = get_options()
+
+ if (not os.path.isdir(options.datadir)):
+ print('ERROR: Data director %s does not exist'%(options.datadir))
+ sys.exit(1)
+
+ info_db = dict()
+ info_files = glob.glob(os.path.join(options.datadir, 'info*.csv'))
+ router_impl = ''
+ lines_per_pkt = 0
+ for ifile in info_files:
+ print('INFO: Reading %s...'%(ifile))
+ tmp = InfoFile(ifile)
+ router_impl = tmp.impl # Assume that all files have the same impl
+ lines_per_pkt = tmp.lpp # Assume that all files have the same LPP
+ info_db[(tmp.lpp, tmp.traffic_patt, tmp.inj_rate)] = tmp
+
+ pkt_db = dict()
+ pkts_files = glob.glob(os.path.join(options.datadir, 'pkts*.csv'))
+ for pfile in pkts_files:
+ print('INFO: Reading %s...'%(pfile))
+ tmp = PktFile(pfile)
+ config_key = (tmp.lpp, tmp.traffic_patt)
+ if config_key not in pkt_db:
+ pkt_db[config_key] = dict()
+ if tmp.inj_rate not in pkt_db[config_key]:
+ pkt_db[config_key][tmp.inj_rate] = []
+
+
+ pkt_db[config_key][tmp.inj_rate].extend(tmp.latencies)
+
+ # Write load-latency plots to file
+ actual_inj_rate_db = dict()
+ for config in sorted(pkt_db):
+ (lpp, traffic_patt) = config
+ ll_file = 'load-latency_%s_traffic-%s_lpp-%d.png'%(router_impl, traffic_patt, lpp)
+ print('INFO: Writing file ' + ll_file + '...')
+ percentile = [0, 25, 50, 75, 90, 95, 99, 99.9, 100]
+ plt.figure()
+ plt.title('Load Latency Graph for %s\n(Traffic: %s, LPP: %d)'%(router_impl, traffic_patt, lpp))
+ for p in percentile:
+ plot_data = dict()
+ for inj_rate in pkt_db[config]:
+ real_inj_rate = info_db[(lpp, traffic_patt, inj_rate)].real_inj_rate
+ plot_data[real_inj_rate] = np.percentile(pkt_db[config][inj_rate], p)
+ latencies = []
+ rates = []
+ for inj_rate in sorted(plot_data):
+ rates.append(inj_rate)
+ latencies.append(plot_data[inj_rate])
+ plt.plot(rates, latencies, label='$P_{%.1f}$'%(p))
+ plt.xlabel('Load (%)')
+ plt.xticks(range(0, 110, 10))
+ plt.ylabel('Latency (cycles)')
+ plt.grid(True)
+ plt.legend()
+ plt.savefig(os.path.join(options.datadir, ll_file), dpi=120)
+ # Generate actual inj_rate graph
+ real_inj_rates = []
+ for inj_rate in sorted(pkt_db[config]):
+ real_inj_rates.append(info_db[(lpp, traffic_patt, inj_rate)].real_inj_rate)
+ actual_inj_rate_db[config] = (sorted(pkt_db[config]), real_inj_rates)
+
+ # Write offered vs actual injection rate plots to file
+ injrate_file = 'injection-rate_%s_lpp-%d.png'%(router_impl, lines_per_pkt)
+ print('INFO: Writing file ' + injrate_file + '...')
+ plt.figure()
+ plt.title('Max Injection Rate Graph for %s'%(router_impl))
+ for config in actual_inj_rate_db:
+ (x, y) = actual_inj_rate_db[config]
+ plt.plot(x, y, label=str(config))
+ plt.xlabel('Offered Injection Rate (%)')
+ plt.xticks(range(0, 110, 10))
+ plt.ylabel('Accepted Injection Rate (%)')
+ plt.yticks(range(0, 110, 10))
+ plt.grid(True)
+ plt.legend()
+ plt.savefig(os.path.join(options.datadir, injrate_file), dpi=120) \ No newline at end of file
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py
new file mode 100755
index 000000000..8e546fef9
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python3
+#
+# Copyright 2018 Ettus Research, a National Instruments Company
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+# Description
+# Run the crossbar testbench (crossbar_tb) for varios parameter
+# configurations and generates load-latency graphs for each run.
+
+import argparse
+import math
+import os, sys
+import shutil
+import glob
+import subprocess
+
+g_tb_top_template = """
+`timescale 1ns/1ps
+module crossbar_tb_auto();
+ crossbar_tb #(
+ .TEST_NAME ("crossbar_tb_auto"),
+ .ROUTER_IMPL ("{rtr_impl}"),
+ .ROUTER_PORTS ({rtr_ports}),
+ .ROUTER_DWIDTH ({rtr_width}),
+ .MTU_LOG2 ({rtr_mtu}),
+ .NUM_MASTERS ({rtr_sources}),
+ .TEST_MAX_PACKETS ({tst_maxpkts}),
+ .TEST_LPP ({tst_lpp}),
+ .TEST_MIN_INJ_RATE ({tst_injrate_min}),
+ .TEST_MAX_INJ_RATE ({tst_injrate_max}),
+ .TEST_INJ_RATE_INCR (10),
+ .TEST_GEN_LL_FILES (1)
+ ) impl (
+ /* no IO */
+ );
+endmodule
+"""
+
+g_test_params = {
+ 'data': {'rtr_width':64, 'rtr_mtu':7, 'tst_maxpkts':100, 'tst_lpp':100, 'tst_injrate_min':30, 'tst_injrate_max':100},
+ 'ctrl': {'rtr_width':64, 'rtr_mtu':5, 'tst_maxpkts':100, 'tst_lpp':20, 'tst_injrate_min':10, 'tst_injrate_max':50},
+}
+
+g_xb_types = {
+ 'chdr_crossbar_nxn':'data', 'axi_crossbar':'data',
+ 'axis_ctrl_2d_torus':'ctrl', 'axis_ctrl_2d_mesh':'ctrl'
+}
+
+def get_options():
+ parser = argparse.ArgumentParser(description='Run correctness sim and generate load-latency plots')
+ parser.add_argument('--impl', type=str, default='chdr_crossbar_nxn', help='Implementation (CSV) [%s]'%(','.join(g_xb_types.keys())))
+ parser.add_argument('--ports', type=str, default='16', help='Number of ports (CSV)')
+ parser.add_argument('--sources', type=str, default='16', help='Number of active data sources (masters)')
+ return parser.parse_args()
+
+def launch_run(impl, ports, sources):
+ run_name = '%s_ports%d_srcs%d'%(impl, ports, sources)
+ # Prepare a transform map to autogenerate a TB file
+ transform = {'rtr_impl':impl, 'rtr_ports':ports, 'rtr_sources':sources}
+ for k,v in g_test_params[g_xb_types[impl]].items():
+ transform[k] = v
+ # Create crossbar_tb_auto.sv with specified parameters
+ with open('crossbar_tb_auto.sv', 'w') as out_file:
+ out_file.write(g_tb_top_template.format(**transform))
+ # Create data directory for the simulation
+ data_dir = os.path.join('data', impl)
+ export_dir = os.path.join('data', run_name)
+ try:
+ os.makedirs('data')
+ except FileExistsError:
+ pass
+ os.makedirs(data_dir)
+ os.makedirs(export_dir)
+ # Run "make xsim"
+ exitcode = subprocess.Popen('make xsim TB_TOP_MODULE=crossbar_tb_auto', shell=True).wait()
+ if exitcode != 0:
+ raise RuntimeError('Error running "make xsim". Was setupenv.sh run?')
+ # Generate load-latency graphs
+ exitcode = subprocess.Popen('gen_load_latency_graph.py ' + data_dir, shell=True).wait()
+ if exitcode != 0:
+ raise RuntimeError('Error running "gen_load_latency_graph.py"')
+ # Copy files
+ os.rename('xsim.log', os.path.join(export_dir, 'xsim.log'))
+ for file in glob.glob(os.path.join(data_dir, '*.png')):
+ shutil.copy(file, export_dir)
+ # Cleanup outputs
+ subprocess.Popen('make cleanall', shell=True).wait()
+ try:
+ os.remove('crossbar_tb_auto.sv')
+ except FileNotFoundError:
+ pass
+ try:
+ shutil.rmtree(data_dir)
+ except OSError:
+ print('WARNING: Could not delete ' + data_dir)
+
+def main():
+ args = get_options();
+ for impl in args.impl.strip().split(','):
+ for ports in args.ports.strip().split(','):
+ for sources in args.sources.strip().split(','):
+ launch_run(impl, int(ports), min(int(ports), int(sources)))
+
+if __name__ == '__main__':
+ main()
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py b/fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py
new file mode 100755
index 000000000..a2eaf71fb
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py
@@ -0,0 +1,125 @@
+#!/usr/bin/python3
+#
+# Copyright 2018 Ettus Research, a National Instruments Company
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+
+import argparse
+import math
+import sys
+import datetime
+import random
+
+# Parse command line options
+# ------------------------------------------------
+def get_options():
+ parser = argparse.ArgumentParser(description='Generate a node to coordinate mapping file.')
+ parser.add_argument('--pattern', type=str, default='xy', choices=['xy', 'yx', 'spiral', 'random'], help='Node distribution pattern')
+ parser.add_argument('--dimsize', type=int, default=4, help='Maximum dimension size')
+ parser.add_argument('--seed', type=int, default=None, help='Seed for random permutation generator')
+ return parser.parse_args()
+
+# Pattern Generators
+# ------------------------------------------------
+def gen_xy(N):
+ nodes = dict()
+ for y in range(N):
+ for x in range(N):
+ nodes[(y*N + x)] = (x, y)
+ return nodes
+
+def gen_yx(N):
+ nodes = dict()
+ for y in range(N):
+ for x in range(N):
+ nodes[(x*N + y)] = (x, y)
+ return nodes
+
+def gen_spiral(N):
+ nodes = dict()
+ x = y = 0
+ dx = 0
+ dy = -1
+ for i in range(N**2):
+ if (-N/2 < x <= N/2) and (-N/2 < y <= N/2):
+ nodes[i] = (x + int(math.ceil(N/2)) - 1, y + int(math.ceil(N/2)) - 1)
+ if x == y or (x < 0 and x == -y) or (x > 0 and x == 1-y):
+ dx, dy = -dy, dx
+ x, y = x+dx, y+dy
+ return nodes
+
+def gen_random(N):
+ nodes = dict()
+ rnodes = random.sample(range(N*N), N*N)
+ for y in range(N):
+ for x in range(N):
+ nodes[rnodes[x*N + y]] = (x, y)
+ return nodes
+
+# Source Generators
+# ------------------------------------------------
+def layout_nodes(nodes):
+ N = int(math.sqrt(len(nodes)))
+ #inv_nodes = {v: k for k, v in nodes.iteritems()}
+ coords = {v: k for k, v in nodes.items()}
+ lines = []
+ for y in range(N):
+ line = ''
+ for x in range(N):
+ line += '%5d'%(coords[(x,y)])
+ lines.append(line)
+ return lines
+
+def gen_vparams(nodes, N, pattern):
+ src_lines = [ '\n// DIM_SIZE = %d, PATTERN = %s'%(N,pattern.upper()), '//------------------------------------' ]
+ for l in layout_nodes(nodes):
+ src_lines.append('// ' + l)
+ bitw = math.ceil(math.log2(N))
+ xvals = ','.join(['%d\'d%d'%(bitw,v[0]) for k, v in sorted(nodes.items(), reverse=True)])
+ yvals = ','.join(['%d\'d%d'%(bitw,v[1]) for k, v in sorted(nodes.items(), reverse=True)])
+ xpar = 'localparam [%d:0] XCOORD_DIM_%03d = {%s};'%(bitw*N*N-1, N, xvals)
+ ypar = 'localparam [%d:0] YCOORD_DIM_%03d = {%s};'%(bitw*N*N-1, N, yvals)
+ src_lines.append(xpar)
+ src_lines.append(ypar)
+ src_lines.append('')
+ return src_lines
+
+def gen_lookup_func(dim, N):
+ src_lines = [ 'function [CLOG2_DIM_SIZE-1:0] node_to_%sdst;'%(dim), ' input [WIDTH-1:0] header;', 'begin']
+ dim_sizes = range(2, N+1)
+ for i in dim_sizes:
+ node_bitw = math.ceil(math.log2(i*i))
+ dim_bitw = math.ceil(math.log2(i))
+ prefix = ' ' if (i == dim_sizes[0]) else ' else '
+ src_lines.append(prefix + 'if (DIM_SIZE == %d)'%(i))
+ src_lines.append(' node_to_%sdst = %sCOORD_DIM_%03d[%d*header[%d:0] +: %d];'%(dim,dim.upper(),i,dim_bitw,node_bitw-1,dim_bitw))
+ src_lines.append(' else')
+ src_lines.append(' node_to_%sdst = {CLOG2_DIM_SIZE{1\'d0}};'%(dim))
+ src_lines.append('end endfunction\n\n')
+ return src_lines
+
+def gen_vheader(dimsize, mapgen, pattern, filename):
+ with open(filename, 'w') as vhfile:
+ vhfile.write('// Copyright %s Ettus Research, A National Instruments Company\n'%(datetime.datetime.now().year))
+ vhfile.write('// SPDX-License-Identifier: LGPL-3.0-or-later\n')
+ vhfile.write('//\n')
+ vhfile.write('// Autogenerated file. Do not modify.\n')
+ vhfile.write('// $ %s\n'%(' '.join(sys.argv[:])))
+ vhfile.write('\nparameter CLOG2_DIM_SIZE = $clog2(DIM_SIZE); //Vivado workaround\n\n')
+ for i in range(2, dimsize+1):
+ nodes = mapgen(i)
+ N = math.sqrt(len(nodes))
+ vhfile.write('\n'.join(gen_vparams(nodes, N, pattern)))
+ vhfile.write('\n\n')
+ vhfile.write('\n'.join(gen_lookup_func('x', dimsize)))
+ vhfile.write('\n'.join(gen_lookup_func('y', dimsize)))
+
+def main():
+ args = get_options();
+ random.seed(args.seed)
+ generators = {'xy': gen_xy, 'yx': gen_yx, 'spiral':gen_spiral, 'random': gen_random}
+ gen_vheader(args.dimsize, generators[args.pattern], args.pattern, 'mesh_node_mapping.vh')
+
+if __name__ == '__main__':
+ main()
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v
new file mode 100644
index 000000000..e0338347b
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v
@@ -0,0 +1,481 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: mesh_2d_dor_router_multi_sw
+// Description:
+// Alternate implementation for mesh_2d_dor_router_single_sw with
+// multiple switches for independent paths between inputs and outputs
+// **NOTE**: This module has not been validated
+
+module mesh_2d_dor_router_multi_sw #(
+ parameter WIDTH = 64,
+ parameter DIM_SIZE = 4,
+ parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0,
+ parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0,
+ parameter TERM_BUFF_SIZE = 5,
+ parameter XB_BUFF_SIZE = 5,
+ parameter ROUTING_ALLOC = "WORMHOLE", // Routing (switching) method {WORMHOLE, CUT-THROUGH}
+ parameter SWITCH_ALLOC = "PRIO" // Switch allocation algorithm {ROUND-ROBIN, PRIO}
+) (
+ // Clocks and resets
+ input wire clk,
+ input wire reset,
+
+ // Terminal connections
+ input wire [WIDTH-1:0] s_axis_ter_tdata,
+ input wire s_axis_ter_tlast,
+ input wire s_axis_ter_tvalid,
+ output wire s_axis_ter_tready,
+ output wire [WIDTH-1:0] m_axis_ter_tdata,
+ output wire m_axis_ter_tlast,
+ output wire m_axis_ter_tvalid,
+ input wire m_axis_ter_tready,
+
+ // West inter-router connections
+ input wire [WIDTH-1:0] s_axis_wst_tdata,
+ input wire [0:0] s_axis_wst_tdest,
+ input wire s_axis_wst_tlast,
+ input wire s_axis_wst_tvalid,
+ output wire s_axis_wst_tready,
+ output wire [WIDTH-1:0] m_axis_wst_tdata,
+ output wire [0:0] m_axis_wst_tdest,
+ output wire m_axis_wst_tlast,
+ output wire m_axis_wst_tvalid,
+ input wire m_axis_wst_tready,
+
+ // East inter-router connections
+ input wire [WIDTH-1:0] s_axis_est_tdata,
+ input wire [0:0] s_axis_est_tdest,
+ input wire s_axis_est_tlast,
+ input wire s_axis_est_tvalid,
+ output wire s_axis_est_tready,
+ output wire [WIDTH-1:0] m_axis_est_tdata,
+ output wire [0:0] m_axis_est_tdest,
+ output wire m_axis_est_tlast,
+ output wire m_axis_est_tvalid,
+ input wire m_axis_est_tready,
+
+ // North inter-router connections
+ input wire [WIDTH-1:0] s_axis_nor_tdata,
+ input wire [0:0] s_axis_nor_tdest,
+ input wire s_axis_nor_tlast,
+ input wire s_axis_nor_tvalid,
+ output wire s_axis_nor_tready,
+ output wire [WIDTH-1:0] m_axis_nor_tdata,
+ output wire [0:0] m_axis_nor_tdest,
+ output wire m_axis_nor_tlast,
+ output wire m_axis_nor_tvalid,
+ input wire m_axis_nor_tready,
+
+ // South inter-router connections
+ input wire [WIDTH-1:0] s_axis_sou_tdata,
+ input wire [0:0] s_axis_sou_tdest,
+ input wire s_axis_sou_tlast,
+ input wire s_axis_sou_tvalid,
+ output wire s_axis_sou_tready,
+ output wire [WIDTH-1:0] m_axis_sou_tdata,
+ output wire [0:0] m_axis_sou_tdest,
+ output wire m_axis_sou_tlast,
+ output wire m_axis_sou_tvalid,
+ input wire m_axis_sou_tready
+);
+ // -------------------------------------------------
+ // Routing functions
+ // -------------------------------------------------
+ `include "mesh_node_mapping.vh"
+
+ function [2:0] term_route;
+ input [WIDTH-1:0] header;
+ reg [$clog2(DIM_SIZE)-1:0] xdst, ydst;
+ reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff;
+ begin
+ xdst = node_to_xdst(header);
+ ydst = node_to_ydst(header);
+ xdiff = xdst - XB_ADDR_X;
+ ydiff = ydst - XB_ADDR_Y;
+ // Routing logic
+ if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin
+ term_route = 3'd0; //TER
+ end else if (xdst == XB_ADDR_X) begin
+ if (ydiff < 0)
+ term_route = 3'd3; //NOR
+ else
+ term_route = 3'd4; //SOU
+ end else begin
+ if (xdiff < 0)
+ term_route = 3'd1; //WST
+ else
+ term_route = 3'd2; //EST
+ end
+ end
+ endfunction
+
+ function [1:0] xdim_route;
+ input [WIDTH-1:0] header;
+ reg [$clog2(DIM_SIZE)-1:0] xdst, ydst;
+ reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff;
+ begin
+ xdst = node_to_xdst(header);
+ ydst = node_to_ydst(header);
+ xdiff = xdst - XB_ADDR_X;
+ ydiff = ydst - XB_ADDR_Y;
+ // Routing logic
+ if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin
+ xdim_route = 2'd0; //TER
+ end else if (xdst == XB_ADDR_X) begin
+ if (ydiff < 0)
+ xdim_route = 2'd2; //NOR
+ else
+ xdim_route = 2'd3; //SOU
+ end else begin
+ xdim_route = 2'd1; //Forward
+ end
+ end
+ endfunction
+
+ function [0:0] ydim_route;
+ input [WIDTH-1:0] header;
+ reg [$clog2(DIM_SIZE)-1:0] xdst, ydst;
+ reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff;
+ begin
+ xdst = node_to_xdst(header);
+ ydst = node_to_ydst(header);
+ xdiff = xdst - XB_ADDR_X;
+ ydiff = ydst - XB_ADDR_Y;
+ // Routing logic
+ if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin
+ ydim_route = 1'd0; //TER
+ end else if (xdst == XB_ADDR_X) begin
+ ydim_route = 1'd1; //Forward
+ end
+ end
+ endfunction
+
+
+ // -------------------------------------------------
+ // Input buffers
+ // -------------------------------------------------
+ wire [WIDTH-1:0] ter_i_tdata;
+ wire ter_i_tlast;
+ wire ter_i_tvalid;
+ wire ter_i_tready;
+
+ axi_packet_gate #(
+ .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE)
+ ) term_in_pkt_gate_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata (s_axis_ter_tdata),
+ .i_tlast (s_axis_ter_tlast),
+ .i_tvalid (s_axis_ter_tvalid),
+ .i_tready (s_axis_ter_tready),
+ .i_terror (1'b0),
+ .o_tdata (ter_i_tdata),
+ .o_tlast (ter_i_tlast),
+ .o_tvalid (ter_i_tvalid),
+ .o_tready (ter_i_tready)
+ );
+
+ wire [WIDTH-1:0] wst_i_tdata, est_i_tdata, nor_i_tdata, sou_i_tdata;
+ wire wst_i_tlast, est_i_tlast, nor_i_tlast, sou_i_tlast;
+ wire wst_i_tvalid, est_i_tvalid, nor_i_tvalid, sou_i_tvalid;
+ wire wst_i_tready, est_i_tready, nor_i_tready, sou_i_tready;
+
+ axis_ingress_vc_buff #(
+ .WIDTH(WIDTH), .NUM_VCS(1),
+ .SIZE(XB_BUFF_SIZE),
+ .ROUTING(ROUTING_ALLOC)
+ ) wst_in_vc_buf_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (s_axis_wst_tdata),
+ .s_axis_tdest (s_axis_wst_tdest),
+ .s_axis_tlast (s_axis_wst_tlast),
+ .s_axis_tvalid (s_axis_wst_tvalid),
+ .s_axis_tready (s_axis_wst_tready),
+ .m_axis_tdata (wst_i_tdata),
+ .m_axis_tlast (wst_i_tlast),
+ .m_axis_tvalid (wst_i_tvalid),
+ .m_axis_tready (wst_i_tready)
+ );
+
+ axis_ingress_vc_buff #(
+ .WIDTH(WIDTH), .NUM_VCS(1),
+ .SIZE(XB_BUFF_SIZE),
+ .ROUTING(ROUTING_ALLOC)
+ ) est_in_vc_buf_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (s_axis_est_tdata),
+ .s_axis_tdest (s_axis_est_tdest),
+ .s_axis_tlast (s_axis_est_tlast),
+ .s_axis_tvalid (s_axis_est_tvalid),
+ .s_axis_tready (s_axis_est_tready),
+ .m_axis_tdata (est_i_tdata),
+ .m_axis_tlast (est_i_tlast),
+ .m_axis_tvalid (est_i_tvalid),
+ .m_axis_tready (est_i_tready)
+ );
+
+ axis_ingress_vc_buff #(
+ .WIDTH(WIDTH), .NUM_VCS(1),
+ .SIZE(XB_BUFF_SIZE),
+ .ROUTING(ROUTING_ALLOC)
+ ) nor_in_vc_buf_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (s_axis_nor_tdata),
+ .s_axis_tdest (s_axis_nor_tdest),
+ .s_axis_tlast (s_axis_nor_tlast),
+ .s_axis_tvalid (s_axis_nor_tvalid),
+ .s_axis_tready (s_axis_nor_tready),
+ .m_axis_tdata (nor_i_tdata),
+ .m_axis_tlast (nor_i_tlast),
+ .m_axis_tvalid (nor_i_tvalid),
+ .m_axis_tready (nor_i_tready)
+ );
+
+ axis_ingress_vc_buff #(
+ .WIDTH(WIDTH), .NUM_VCS(1),
+ .SIZE(XB_BUFF_SIZE),
+ .ROUTING(ROUTING_ALLOC)
+ ) sou_in_vc_buf_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (s_axis_sou_tdata),
+ .s_axis_tdest (s_axis_sou_tdest),
+ .s_axis_tlast (s_axis_sou_tlast),
+ .s_axis_tvalid (s_axis_sou_tvalid),
+ .s_axis_tready (s_axis_sou_tready),
+ .m_axis_tdata (sou_i_tdata),
+ .m_axis_tlast (sou_i_tlast),
+ .m_axis_tvalid (sou_i_tvalid),
+ .m_axis_tready (sou_i_tready)
+ );
+
+ // -------------------------------------------------
+ // Input demuxes
+ // -------------------------------------------------
+
+ wire [WIDTH-1:0] t2t_tdata, t2w_tdata, t2e_tdata, t2n_tdata, t2s_tdata;
+ wire t2t_tlast, t2w_tlast, t2e_tlast, t2n_tlast, t2s_tlast;
+ wire t2t_tvalid, t2w_tvalid, t2e_tvalid, t2n_tvalid, t2s_tvalid;
+ wire t2t_tready, t2w_tready, t2e_tready, t2n_tready, t2s_tready;
+
+ wire [WIDTH-1:0] w2t_tdata, w2e_tdata, w2n_tdata, w2s_tdata;
+ wire w2t_tlast, w2e_tlast, w2n_tlast, w2s_tlast;
+ wire w2t_tvalid, w2e_tvalid, w2n_tvalid, w2s_tvalid;
+ wire w2t_tready, w2e_tready, w2n_tready, w2s_tready;
+
+ wire [WIDTH-1:0] e2t_tdata, e2w_tdata, e2n_tdata, e2s_tdata;
+ wire e2t_tlast, e2w_tlast, e2n_tlast, e2s_tlast;
+ wire e2t_tvalid, e2w_tvalid, e2n_tvalid, e2s_tvalid;
+ wire e2t_tready, e2w_tready, e2n_tready, e2s_tready;
+
+ wire [WIDTH-1:0] n2t_tdata, n2s_tdata;
+ wire n2t_tlast, n2s_tlast;
+ wire n2t_tvalid, n2s_tvalid;
+ wire n2t_tready, n2s_tready;
+
+ wire [WIDTH-1:0] s2t_tdata, s2n_tdata;
+ wire s2t_tlast, s2n_tlast;
+ wire s2t_tvalid, s2n_tvalid;
+ wire s2t_tready, s2n_tready;
+
+ wire [WIDTH-1:0] ter_i_hdr, wst_i_hdr, est_i_hdr, nor_i_hdr, sou_i_hdr;
+
+ axi_demux #(
+ .WIDTH(WIDTH), .SIZE(5),
+ .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0)
+ ) ter_i_demux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .header (ter_i_hdr),
+ .dest (term_route(ter_i_hdr)),
+ .i_tdata (ter_i_tdata),
+ .i_tlast (ter_i_tlast),
+ .i_tvalid (ter_i_tvalid),
+ .i_tready (ter_i_tready),
+ .o_tdata ({t2s_tdata, t2n_tdata, t2e_tdata, t2w_tdata, t2t_tdata}),
+ .o_tlast ({t2s_tlast, t2n_tlast, t2e_tlast, t2w_tlast, t2t_tlast}),
+ .o_tvalid ({t2s_tvalid, t2n_tvalid, t2e_tvalid, t2w_tvalid, t2t_tvalid}),
+ .o_tready ({t2s_tready, t2n_tready, t2e_tready, t2w_tready, t2t_tready})
+ );
+
+ axi_demux #(
+ .WIDTH(WIDTH), .SIZE(4),
+ .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0)
+ ) wst_i_demux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .header (wst_i_hdr),
+ .dest (xdim_route(wst_i_hdr)),
+ .i_tdata (wst_i_tdata),
+ .i_tlast (wst_i_tlast),
+ .i_tvalid (wst_i_tvalid),
+ .i_tready (wst_i_tready),
+ .o_tdata ({w2s_tdata, w2n_tdata, w2e_tdata, w2t_tdata}),
+ .o_tlast ({w2s_tlast, w2n_tlast, w2e_tlast, w2t_tlast}),
+ .o_tvalid ({w2s_tvalid, w2n_tvalid, w2e_tvalid, w2t_tvalid}),
+ .o_tready ({w2s_tready, w2n_tready, w2e_tready, w2t_tready})
+ );
+
+ axi_demux #(
+ .WIDTH(WIDTH), .SIZE(4),
+ .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0)
+ ) est_i_demux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .header (est_i_hdr),
+ .dest (xdim_route(est_i_hdr)),
+ .i_tdata (est_i_tdata),
+ .i_tlast (est_i_tlast),
+ .i_tvalid (est_i_tvalid),
+ .i_tready (est_i_tready),
+ .o_tdata ({e2s_tdata, e2n_tdata, e2w_tdata, e2t_tdata}),
+ .o_tlast ({e2s_tlast, e2n_tlast, e2w_tlast, e2t_tlast}),
+ .o_tvalid ({e2s_tvalid, e2n_tvalid, e2w_tvalid, e2t_tvalid}),
+ .o_tready ({e2s_tready, e2n_tready, e2w_tready, e2t_tready})
+ );
+
+ axi_demux #(
+ .WIDTH(WIDTH), .SIZE(2),
+ .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0)
+ ) nor_i_demux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .header (nor_i_hdr),
+ .dest (ydim_route(nor_i_hdr)),
+ .i_tdata (nor_i_tdata),
+ .i_tlast (nor_i_tlast),
+ .i_tvalid (nor_i_tvalid),
+ .i_tready (nor_i_tready),
+ .o_tdata ({n2t_tdata, n2s_tdata}),
+ .o_tlast ({n2t_tlast, n2s_tlast}),
+ .o_tvalid ({n2t_tvalid, n2s_tvalid}),
+ .o_tready ({n2t_tready, n2s_tready})
+ );
+
+ axi_demux #(
+ .WIDTH(WIDTH), .SIZE(2),
+ .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0)
+ ) sou_i_demux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .header (sou_i_hdr),
+ .dest (ydim_route(sou_i_hdr)),
+ .i_tdata (sou_i_tdata),
+ .i_tlast (sou_i_tlast),
+ .i_tvalid (sou_i_tvalid),
+ .i_tready (sou_i_tready),
+ .o_tdata ({s2t_tdata, s2n_tdata}),
+ .o_tlast ({s2t_tlast, s2n_tlast}),
+ .o_tvalid ({s2t_tvalid, s2n_tvalid}),
+ .o_tready ({s2t_tready, s2n_tready})
+ );
+
+ // -------------------------------------------------
+ // Output muxes
+ // -------------------------------------------------
+
+ axi_mux #(
+ .WIDTH(WIDTH), .SIZE(5),
+ .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1)
+ ) ter_o_mux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata ({t2t_tdata, w2t_tdata, e2t_tdata, n2t_tdata, s2t_tdata}),
+ .i_tlast ({t2t_tlast, w2t_tlast, e2t_tlast, n2t_tlast, s2t_tlast}),
+ .i_tvalid ({t2t_tvalid, w2t_tvalid, e2t_tvalid, n2t_tvalid, s2t_tvalid}),
+ .i_tready ({t2t_tready, w2t_tready, e2t_tready, n2t_tready, s2t_tready}),
+ .o_tdata (m_axis_ter_tdata),
+ .o_tlast (m_axis_ter_tlast),
+ .o_tvalid (m_axis_ter_tvalid),
+ .o_tready (m_axis_ter_tready)
+ );
+
+ axi_mux #(
+ .WIDTH(WIDTH), .SIZE(2),
+ .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1)
+ ) wst_o_mux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata ({t2w_tdata, e2w_tdata}),
+ .i_tlast ({t2w_tlast, e2w_tlast}),
+ .i_tvalid ({t2w_tvalid, e2w_tvalid}),
+ .i_tready ({t2w_tready, e2w_tready}),
+ .o_tdata (m_axis_wst_tdata),
+ .o_tlast (m_axis_wst_tlast),
+ .o_tvalid (m_axis_wst_tvalid),
+ .o_tready (m_axis_wst_tready)
+ );
+ assign m_axis_wst_tdest = 1'b0;
+
+ axi_mux #(
+ .WIDTH(WIDTH), .SIZE(2),
+ .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1)
+ ) est_o_mux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata ({t2e_tdata, w2e_tdata}),
+ .i_tlast ({t2e_tlast, w2e_tlast}),
+ .i_tvalid ({t2e_tvalid, w2e_tvalid}),
+ .i_tready ({t2e_tready, w2e_tready}),
+ .o_tdata (m_axis_est_tdata),
+ .o_tlast (m_axis_est_tlast),
+ .o_tvalid (m_axis_est_tvalid),
+
+
+ .o_tready (m_axis_est_tready)
+ );
+ assign m_axis_est_tdest = 1'b0;
+
+ axi_mux #(
+ .WIDTH(WIDTH), .SIZE(4),
+ .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1)
+ ) nor_o_mux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata ({t2n_tdata, w2n_tdata, e2n_tdata, s2n_tdata}),
+ .i_tlast ({t2n_tlast, w2n_tlast, e2n_tlast, s2n_tlast}),
+ .i_tvalid ({t2n_tvalid, w2n_tvalid, e2n_tvalid, s2n_tvalid}),
+ .i_tready ({t2n_tready, w2n_tready, e2n_tready, s2n_tready}),
+ .o_tdata (m_axis_nor_tdata),
+ .o_tlast (m_axis_nor_tlast),
+ .o_tvalid (m_axis_nor_tvalid),
+ .o_tready (m_axis_nor_tready)
+ );
+ assign m_axis_nor_tdest = 1'b0;
+
+ axi_mux #(
+ .WIDTH(WIDTH), .SIZE(4),
+ .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1)
+ ) sou_o_mux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata ({t2s_tdata, w2s_tdata, e2s_tdata, n2s_tdata}),
+ .i_tlast ({t2s_tlast, w2s_tlast, e2s_tlast, n2s_tlast}),
+ .i_tvalid ({t2s_tvalid, w2s_tvalid, e2s_tvalid, n2s_tvalid}),
+ .i_tready ({t2s_tready, w2s_tready, e2s_tready, n2s_tready}),
+ .o_tdata (m_axis_sou_tdata),
+ .o_tlast (m_axis_sou_tlast),
+ .o_tvalid (m_axis_sou_tvalid),
+ .o_tready (m_axis_sou_tready)
+ );
+ assign m_axis_sou_tdest = 1'b0;
+
+endmodule
+
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v
new file mode 100644
index 000000000..65cded545
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v
@@ -0,0 +1,398 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: mesh_2d_dor_router_single_sw
+// Description:
+// This module implements the router for a 2-dimentional (2d)
+// mesh network that uses dimension order routing (dor) and has a
+// single underlying switch (single_sw). It uses AXI-Stream for all of its
+// links.
+// The mesh topology, routing algorithms and the router architecture is
+// described in README.md in this directory.
+// Parameters:
+// - WIDTH: Width of the AXI-Stream data bus
+// - DIM_SIZE: Number of routers alone one dimension
+// - XB_ADDR_X: The X-coordinate of this router in the topology
+// - XB_ADDR_Y: The Y-coordinate of this router in the topology
+// - TERM_BUFF_SIZE: log2 of the ingress terminal buffer size (in words)
+// - XB_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words)
+// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers.
+// * WORMHOLE: Allocate route as soon as first word in pkt arrives
+// * CUT-THROUGH: Allocate route only after the full pkt arrives
+// - SWITCH_ALLOC: Algorithm to allocate the switch
+// * PRIO: Priority based. Priority: Y-dim > X-dim > Term
+// * ROUND-ROBIN: Round robin input port allocation
+// Signals:
+// - *_axis_ter_*: Terminal ports (master/slave)
+// - *_axis_wst_*: Inter-router X-dim west connections (master/slave)
+// - *_axis_est_*: Inter-router X-dim east connections (master/slave)
+// - *_axis_nor_*: Inter-router X-dim north connections (master/slave)
+// - *_axis_sou_*: Inter-router X-dim south connections (master/slave)
+//
+
+module mesh_2d_dor_router_single_sw #(
+ parameter WIDTH = 64,
+ parameter DIM_SIZE = 4,
+ parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0,
+ parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0,
+ parameter TERM_BUFF_SIZE = 5,
+ parameter XB_BUFF_SIZE = 5,
+ parameter ROUTING_ALLOC = "WORMHOLE", // Routing (switching) method {WORMHOLE, CUT-THROUGH}
+ parameter SWITCH_ALLOC = "PRIO" // Switch allocation algorithm {ROUND-ROBIN, PRIO}
+) (
+ // Clocks and resets
+ input wire clk,
+ input wire reset,
+
+ // Terminal connections
+ input wire [WIDTH-1:0] s_axis_ter_tdata,
+ input wire s_axis_ter_tlast,
+ input wire s_axis_ter_tvalid,
+ output wire s_axis_ter_tready,
+ output wire [WIDTH-1:0] m_axis_ter_tdata,
+ output wire m_axis_ter_tlast,
+ output wire m_axis_ter_tvalid,
+ input wire m_axis_ter_tready,
+
+ // West inter-router connections
+ input wire [WIDTH-1:0] s_axis_wst_tdata,
+ input wire [0:0] s_axis_wst_tdest,
+ input wire s_axis_wst_tlast,
+ input wire s_axis_wst_tvalid,
+ output wire s_axis_wst_tready,
+ output wire [WIDTH-1:0] m_axis_wst_tdata,
+ output wire [0:0] m_axis_wst_tdest,
+ output wire m_axis_wst_tlast,
+ output wire m_axis_wst_tvalid,
+ input wire m_axis_wst_tready,
+
+ // East inter-router connections
+ input wire [WIDTH-1:0] s_axis_est_tdata,
+ input wire [0:0] s_axis_est_tdest,
+ input wire s_axis_est_tlast,
+ input wire s_axis_est_tvalid,
+ output wire s_axis_est_tready,
+ output wire [WIDTH-1:0] m_axis_est_tdata,
+ output wire [0:0] m_axis_est_tdest,
+ output wire m_axis_est_tlast,
+ output wire m_axis_est_tvalid,
+ input wire m_axis_est_tready,
+
+ // North inter-router connections
+ input wire [WIDTH-1:0] s_axis_nor_tdata,
+ input wire [0:0] s_axis_nor_tdest,
+ input wire s_axis_nor_tlast,
+ input wire s_axis_nor_tvalid,
+ output wire s_axis_nor_tready,
+ output wire [WIDTH-1:0] m_axis_nor_tdata,
+ output wire [0:0] m_axis_nor_tdest,
+ output wire m_axis_nor_tlast,
+ output wire m_axis_nor_tvalid,
+ input wire m_axis_nor_tready,
+
+ // South inter-router connections
+ input wire [WIDTH-1:0] s_axis_sou_tdata,
+ input wire [0:0] s_axis_sou_tdest,
+ input wire s_axis_sou_tlast,
+ input wire s_axis_sou_tvalid,
+ output wire s_axis_sou_tready,
+ output wire [WIDTH-1:0] m_axis_sou_tdata,
+ output wire [0:0] m_axis_sou_tdest,
+ output wire m_axis_sou_tlast,
+ output wire m_axis_sou_tvalid,
+ input wire m_axis_sou_tready
+);
+ // -------------------------------------------------
+ // Routing functions
+ // -------------------------------------------------
+
+ // mesh_node_mapping.vh file contains the mapping between the node number
+ // and its XY coordinates. It is autogenerated and defines the node_to_xdst
+ // and node_to_ydst functions.
+ `include "mesh_node_mapping.vh"
+
+ localparam [2:0] SW_DEST_TER = 3'd0;
+ localparam [2:0] SW_DEST_WST = 3'd1;
+ localparam [2:0] SW_DEST_EST = 3'd2;
+ localparam [2:0] SW_DEST_NOR = 3'd3;
+ localparam [2:0] SW_DEST_SOU = 3'd4;
+ localparam [2:0] SW_NUM_DESTS = 3'd5;
+
+ // The compute_switch_tdest function is the destination selector
+ // i.e. it will inspecte the bottom $clog2(DIM_SIZE)*2 bits of the
+ // first word of a packet and determine the destination of the packet.
+ function [3:0] compute_switch_tdest;
+ input [WIDTH-1:0] header;
+ input [3:0] src;
+ reg [$clog2(DIM_SIZE)-1:0] xdst, ydst;
+ reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff;
+ begin
+ xdst = node_to_xdst(header);
+ ydst = node_to_ydst(header);
+ xdiff = xdst - XB_ADDR_X;
+ ydiff = ydst - XB_ADDR_Y;
+ // Routing logic
+ // - MSB is the VC, 3 LSBs are the router destination
+ // - VC in a mesh is always 0
+ if (xdiff == 'd0 && ydiff == 'd0) begin
+ // VC=0 because terminals don't have VCs
+ compute_switch_tdest = {1'b0, SW_DEST_TER};
+ end else if (xdiff == 'd0) begin
+ // VC=1 for CCW turns and VC=0 for everything else
+ if (ydiff < 0)
+ compute_switch_tdest = {(src == SW_DEST_WST), SW_DEST_NOR};
+ else
+ compute_switch_tdest = {(src == SW_DEST_EST), SW_DEST_SOU};
+ end else begin
+ // VC=0 because east-west paths don't have VCs
+ if (xdiff < 0)
+ compute_switch_tdest = {1'b0, SW_DEST_WST};
+ else
+ compute_switch_tdest = {1'b0, SW_DEST_EST};
+ end
+ if (xdst != 'hx && ydst != 'hx) begin
+ if (XB_ADDR_X == 0 && compute_switch_tdest == SW_DEST_WST)
+ $display("Illegal route chosen: WEST. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y);
+ if (XB_ADDR_X == DIM_SIZE-1 && compute_switch_tdest == SW_DEST_EST)
+ $display("Illegal route chosen: EAST. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y);
+ if (XB_ADDR_Y == 0 && compute_switch_tdest == SW_DEST_NOR)
+ $display("Illegal route chosen: NORTH. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y);
+ if (XB_ADDR_Y == DIM_SIZE-1 && compute_switch_tdest == SW_DEST_SOU)
+ $display("Illegal route chosen: SOUTH. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y);
+ end
+ //$display("xdst=%d, ydst=%d, xaddr=%d, yaddr=%d, dst=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y, compute_switch_tdest);
+ end
+ endfunction
+
+ // The compute_switch_alloc function is the switch allocation function
+ // i.e. it chooses which input port reserves the switch for packet transfer.
+ // After the switch is allocated, all other ports will be backpressured until
+ // the packet finishes transferring.
+ function [2:0] compute_switch_alloc;
+ input [4:0] pkt_waiting;
+ input [2:0] last_alloc;
+ begin
+ if (pkt_waiting == 5'b00000) begin
+ compute_switch_alloc = SW_DEST_TER;
+ end else if (pkt_waiting == 5'b00001) begin
+ compute_switch_alloc = SW_DEST_TER;
+ end else if (pkt_waiting == 5'b00010) begin
+ compute_switch_alloc = SW_DEST_WST;
+ end else if (pkt_waiting == 5'b00100) begin
+ compute_switch_alloc = SW_DEST_EST;
+ end else if (pkt_waiting == 5'b01000) begin
+ compute_switch_alloc = SW_DEST_NOR;
+ end else if (pkt_waiting == 5'b10000) begin
+ compute_switch_alloc = SW_DEST_SOU;
+ end else begin
+ if (SWITCH_ALLOC == "PRIO") begin
+ // Priority: South > East > North > West > Term
+ if (pkt_waiting[SW_DEST_SOU])
+ compute_switch_alloc = SW_DEST_SOU;
+ else if (pkt_waiting[SW_DEST_EST])
+ compute_switch_alloc = SW_DEST_EST;
+ else if (pkt_waiting[SW_DEST_NOR])
+ compute_switch_alloc = SW_DEST_NOR;
+ else if (pkt_waiting[SW_DEST_WST])
+ compute_switch_alloc = SW_DEST_WST;
+ else
+ compute_switch_alloc = SW_DEST_TER;
+ end else begin
+ // Round-robin
+ if (pkt_waiting[(last_alloc + 3'd1) % SW_NUM_DESTS])
+ compute_switch_alloc = (last_alloc + 3'd1) % SW_NUM_DESTS;
+ else if (pkt_waiting[(last_alloc + 3'd2) % SW_NUM_DESTS])
+ compute_switch_alloc = (last_alloc + 3'd2) % SW_NUM_DESTS;
+ else if (pkt_waiting[(last_alloc + 3'd3) % SW_NUM_DESTS])
+ compute_switch_alloc = (last_alloc + 3'd3) % SW_NUM_DESTS;
+ else if (pkt_waiting[(last_alloc + 3'd4) % SW_NUM_DESTS])
+ compute_switch_alloc = (last_alloc + 3'd4) % SW_NUM_DESTS;
+ else
+ compute_switch_alloc = last_alloc;
+ end
+ end
+ //$display("pkt_waiting=%b, alloc=%d, last_alloc=%d", pkt_waiting, compute_switch_alloc, last_alloc);
+ end
+ endfunction
+
+ // -------------------------------------------------
+ // Input buffers
+ // -------------------------------------------------
+ wire [WIDTH-1:0] ter_i_tdata;
+ wire [3:0] ter_i_tdest;
+ wire ter_i_tlast;
+ wire ter_i_tvalid;
+ wire ter_i_tready;
+
+ // Data coming in from the terminal is gated until a full packet arrives
+ // in order to minimize the switch allocation time per packet.
+ axi_packet_gate #(
+ .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE)
+ ) term_in_pkt_gate_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata (s_axis_ter_tdata),
+ .i_tlast (s_axis_ter_tlast),
+ .i_tvalid (s_axis_ter_tvalid),
+ .i_tready (s_axis_ter_tready),
+ .i_terror (1'b0),
+ .o_tdata (ter_i_tdata),
+ .o_tlast (ter_i_tlast),
+ .o_tvalid (ter_i_tvalid),
+ .o_tready (ter_i_tready)
+ );
+ assign ter_i_tdest = compute_switch_tdest(ter_i_tdata, SW_DEST_TER);
+
+ wire [WIDTH-1:0] wst_i_tdata, est_i_tdata, nor_i_tdata, sou_i_tdata;
+ wire [3:0] wst_i_tdest, est_i_tdest, nor_i_tdest, sou_i_tdest;
+ wire wst_i_tlast, est_i_tlast, nor_i_tlast, sou_i_tlast;
+ wire wst_i_tvalid, est_i_tvalid, nor_i_tvalid, sou_i_tvalid;
+ wire wst_i_tready, est_i_tready, nor_i_tready, sou_i_tready;
+
+ axis_ingress_vc_buff #(
+ .WIDTH(WIDTH), .NUM_VCS(1),
+ .SIZE(XB_BUFF_SIZE),
+ .ROUTING(ROUTING_ALLOC)
+ ) wst_in_vc_buf_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (s_axis_wst_tdata),
+ .s_axis_tdest (s_axis_wst_tdest),
+ .s_axis_tlast (s_axis_wst_tlast),
+ .s_axis_tvalid (s_axis_wst_tvalid),
+ .s_axis_tready (s_axis_wst_tready),
+ .m_axis_tdata (wst_i_tdata),
+ .m_axis_tlast (wst_i_tlast),
+ .m_axis_tvalid (wst_i_tvalid),
+ .m_axis_tready (wst_i_tready)
+ );
+ assign wst_i_tdest = compute_switch_tdest(wst_i_tdata, SW_DEST_WST);
+
+ axis_ingress_vc_buff #(
+ .WIDTH(WIDTH), .NUM_VCS(1),
+ .SIZE(XB_BUFF_SIZE),
+ .ROUTING(ROUTING_ALLOC)
+ ) est_in_vc_buf_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (s_axis_est_tdata),
+ .s_axis_tdest (s_axis_est_tdest),
+ .s_axis_tlast (s_axis_est_tlast),
+ .s_axis_tvalid (s_axis_est_tvalid),
+ .s_axis_tready (s_axis_est_tready),
+ .m_axis_tdata (est_i_tdata),
+ .m_axis_tlast (est_i_tlast),
+ .m_axis_tvalid (est_i_tvalid),
+ .m_axis_tready (est_i_tready)
+ );
+ assign est_i_tdest = compute_switch_tdest(est_i_tdata, SW_DEST_EST);
+
+ axis_ingress_vc_buff #(
+ .WIDTH(WIDTH), .NUM_VCS(2), // Only north-south traffic has VCs
+ .SIZE(XB_BUFF_SIZE),
+ .ROUTING(ROUTING_ALLOC)
+ ) nor_in_vc_buf_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (s_axis_nor_tdata),
+ .s_axis_tdest (s_axis_nor_tdest),
+ .s_axis_tlast (s_axis_nor_tlast),
+ .s_axis_tvalid (s_axis_nor_tvalid),
+ .s_axis_tready (s_axis_nor_tready),
+ .m_axis_tdata (nor_i_tdata),
+ .m_axis_tlast (nor_i_tlast),
+ .m_axis_tvalid (nor_i_tvalid),
+ .m_axis_tready (nor_i_tready)
+ );
+ assign nor_i_tdest = compute_switch_tdest(nor_i_tdata, SW_DEST_NOR);
+
+ axis_ingress_vc_buff #(
+ .WIDTH(WIDTH), .NUM_VCS(2), // Only north-south traffic has VCs
+ .SIZE(XB_BUFF_SIZE),
+ .ROUTING(ROUTING_ALLOC)
+ ) sou_in_vc_buf_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (s_axis_sou_tdata),
+ .s_axis_tdest (s_axis_sou_tdest),
+ .s_axis_tlast (s_axis_sou_tlast),
+ .s_axis_tvalid (s_axis_sou_tvalid),
+ .s_axis_tready (s_axis_sou_tready),
+ .m_axis_tdata (sou_i_tdata),
+ .m_axis_tlast (sou_i_tlast),
+ .m_axis_tvalid (sou_i_tvalid),
+ .m_axis_tready (sou_i_tready)
+ );
+ assign sou_i_tdest = compute_switch_tdest(sou_i_tdata, SW_DEST_SOU);
+
+ //-------------------------------------------------
+ // Switch
+ //-------------------------------------------------
+ // Track the input packet state
+ localparam [0:0] PKT_ST_HEAD = 1'b0;
+ localparam [0:0] PKT_ST_BODY = 1'b1;
+ reg [0:0] pkt_state = PKT_ST_HEAD;
+
+ // The switch only accept packets on a single port at a time.
+ wire sw_in_ready = |({sou_i_tready, nor_i_tready, est_i_tready, wst_i_tready, ter_i_tready});
+ wire sw_in_valid = |({sou_i_tvalid, nor_i_tvalid, est_i_tvalid, wst_i_tvalid, ter_i_tvalid});
+ wire sw_in_last = |({sou_i_tlast & sou_i_tvalid, nor_i_tlast & nor_i_tvalid,
+ est_i_tlast & est_i_tvalid, wst_i_tlast & wst_i_tvalid,
+ ter_i_tlast & ter_i_tvalid});
+
+ always @(posedge clk) begin
+ if (reset) begin
+ pkt_state <= PKT_ST_HEAD;
+ end else if (sw_in_valid & sw_in_ready) begin
+ pkt_state <= sw_in_last ? PKT_ST_HEAD : PKT_ST_BODY;
+ end
+ end
+
+ // The switch requires the allocation to stay valid until the
+ // end of the packet. We also might need to keep the previous
+ // packet's allocation to compute the current one
+ wire [2:0] switch_alloc;
+ reg [2:0] prev_switch_alloc = SW_DEST_TER;
+ reg [2:0] pkt_switch_alloc = SW_DEST_TER;
+
+ always @(posedge clk) begin
+ if (reset) begin
+ prev_switch_alloc <= SW_DEST_TER;
+ pkt_switch_alloc <= SW_DEST_TER;
+ end else if (sw_in_valid & sw_in_ready) begin
+ if (pkt_state == PKT_ST_HEAD)
+ pkt_switch_alloc <= switch_alloc;
+ if (sw_in_last)
+ prev_switch_alloc <= switch_alloc;
+ end
+ end
+
+ assign switch_alloc = (sw_in_valid && pkt_state == PKT_ST_HEAD) ?
+ compute_switch_alloc({sou_i_tvalid, nor_i_tvalid, est_i_tvalid, wst_i_tvalid, ter_i_tvalid}, prev_switch_alloc) :
+ pkt_switch_alloc;
+
+ wire ter_tdest_discard;
+ axis_switch #(
+ .DATA_W(WIDTH), .DEST_W(1), .IN_PORTS(5), .OUT_PORTS(5)
+ ) switch_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata ({sou_i_tdata , nor_i_tdata , est_i_tdata , wst_i_tdata , ter_i_tdata }),
+ .s_axis_tdest ({sou_i_tdest , nor_i_tdest , est_i_tdest , wst_i_tdest , ter_i_tdest }),
+ .s_axis_tlast ({sou_i_tlast , nor_i_tlast , est_i_tlast , wst_i_tlast , ter_i_tlast }),
+ .s_axis_tvalid ({sou_i_tvalid, nor_i_tvalid, est_i_tvalid, wst_i_tvalid, ter_i_tvalid}),
+ .s_axis_tready ({sou_i_tready, nor_i_tready, est_i_tready, wst_i_tready, ter_i_tready}),
+ .s_axis_alloc (switch_alloc),
+ .m_axis_tdata ({m_axis_sou_tdata, m_axis_nor_tdata, m_axis_est_tdata, m_axis_wst_tdata, m_axis_ter_tdata }),
+ .m_axis_tdest ({m_axis_sou_tdest, m_axis_nor_tdest, m_axis_est_tdest, m_axis_wst_tdest, ter_tdest_discard}),
+ .m_axis_tlast ({m_axis_sou_tlast, m_axis_nor_tlast, m_axis_est_tlast, m_axis_wst_tlast, m_axis_ter_tlast }),
+ .m_axis_tvalid ({m_axis_sou_tvalid, m_axis_nor_tvalid, m_axis_est_tvalid, m_axis_wst_tvalid, m_axis_ter_tvalid}),
+ .m_axis_tready ({m_axis_sou_tready, m_axis_nor_tready, m_axis_est_tready, m_axis_wst_tready, m_axis_ter_tready})
+ );
+
+
+endmodule
+
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh b/fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh
new file mode 100644
index 000000000..466b0c615
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh
@@ -0,0 +1,294 @@
+// Copyright 2018 Ettus Research, A National Instruments Company
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Autogenerated file. Do not modify.
+// $ ./gen_node_to_coord_mapping.py --dimsize 16 --pattern spiral
+
+parameter CLOG2_DIM_SIZE = $clog2(DIM_SIZE); //Vivado workaround
+
+
+// DIM_SIZE = 2, PATTERN = SPIRAL
+//------------------------------------
+// 0 1
+// 3 2
+localparam [3:0] XCOORD_DIM_002 = {1'd0,1'd1,1'd1,1'd0};
+localparam [3:0] YCOORD_DIM_002 = {1'd1,1'd1,1'd0,1'd0};
+
+// DIM_SIZE = 3, PATTERN = SPIRAL
+//------------------------------------
+// 6 7 8
+// 5 0 1
+// 4 3 2
+localparam [17:0] XCOORD_DIM_003 = {2'd2,2'd1,2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd1};
+localparam [17:0] YCOORD_DIM_003 = {2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd2,2'd1,2'd1};
+
+// DIM_SIZE = 4, PATTERN = SPIRAL
+//------------------------------------
+// 6 7 8 9
+// 5 0 1 10
+// 4 3 2 11
+// 15 14 13 12
+localparam [31:0] XCOORD_DIM_004 = {2'd0,2'd1,2'd2,2'd3,2'd3,2'd3,2'd3,2'd2,2'd1,2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd1};
+localparam [31:0] YCOORD_DIM_004 = {2'd3,2'd3,2'd3,2'd3,2'd2,2'd1,2'd0,2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd2,2'd1,2'd1};
+
+// DIM_SIZE = 5, PATTERN = SPIRAL
+//------------------------------------
+// 20 21 22 23 24
+// 19 6 7 8 9
+// 18 5 0 1 10
+// 17 4 3 2 11
+// 16 15 14 13 12
+localparam [74:0] XCOORD_DIM_005 = {3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd2};
+localparam [74:0] YCOORD_DIM_005 = {3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd3,3'd2,3'd2};
+
+// DIM_SIZE = 6, PATTERN = SPIRAL
+//------------------------------------
+// 20 21 22 23 24 25
+// 19 6 7 8 9 26
+// 18 5 0 1 10 27
+// 17 4 3 2 11 28
+// 16 15 14 13 12 29
+// 35 34 33 32 31 30
+localparam [107:0] XCOORD_DIM_006 = {3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd2};
+localparam [107:0] YCOORD_DIM_006 = {3'd5,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd3,3'd2,3'd2};
+
+// DIM_SIZE = 7, PATTERN = SPIRAL
+//------------------------------------
+// 42 43 44 45 46 47 48
+// 41 20 21 22 23 24 25
+// 40 19 6 7 8 9 26
+// 39 18 5 0 1 10 27
+// 38 17 4 3 2 11 28
+// 37 16 15 14 13 12 29
+// 36 35 34 33 32 31 30
+localparam [146:0] XCOORD_DIM_007 = {3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd3};
+localparam [146:0] YCOORD_DIM_007 = {3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd4,3'd3,3'd3};
+
+// DIM_SIZE = 8, PATTERN = SPIRAL
+//------------------------------------
+// 42 43 44 45 46 47 48 49
+// 41 20 21 22 23 24 25 50
+// 40 19 6 7 8 9 26 51
+// 39 18 5 0 1 10 27 52
+// 38 17 4 3 2 11 28 53
+// 37 16 15 14 13 12 29 54
+// 36 35 34 33 32 31 30 55
+// 63 62 61 60 59 58 57 56
+localparam [191:0] XCOORD_DIM_008 = {3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd3};
+localparam [191:0] YCOORD_DIM_008 = {3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd4,3'd3,3'd3};
+
+// DIM_SIZE = 9, PATTERN = SPIRAL
+//------------------------------------
+// 72 73 74 75 76 77 78 79 80
+// 71 42 43 44 45 46 47 48 49
+// 70 41 20 21 22 23 24 25 50
+// 69 40 19 6 7 8 9 26 51
+// 68 39 18 5 0 1 10 27 52
+// 67 38 17 4 3 2 11 28 53
+// 66 37 16 15 14 13 12 29 54
+// 65 36 35 34 33 32 31 30 55
+// 64 63 62 61 60 59 58 57 56
+localparam [323:0] XCOORD_DIM_009 = {4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd4};
+localparam [323:0] YCOORD_DIM_009 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd5,4'd4,4'd4};
+
+// DIM_SIZE = 10, PATTERN = SPIRAL
+//------------------------------------
+// 72 73 74 75 76 77 78 79 80 81
+// 71 42 43 44 45 46 47 48 49 82
+// 70 41 20 21 22 23 24 25 50 83
+// 69 40 19 6 7 8 9 26 51 84
+// 68 39 18 5 0 1 10 27 52 85
+// 67 38 17 4 3 2 11 28 53 86
+// 66 37 16 15 14 13 12 29 54 87
+// 65 36 35 34 33 32 31 30 55 88
+// 64 63 62 61 60 59 58 57 56 89
+// 99 98 97 96 95 94 93 92 91 90
+localparam [399:0] XCOORD_DIM_010 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd4};
+localparam [399:0] YCOORD_DIM_010 = {4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd5,4'd4,4'd4};
+
+// DIM_SIZE = 11, PATTERN = SPIRAL
+//------------------------------------
+// 110 111 112 113 114 115 116 117 118 119 120
+// 109 72 73 74 75 76 77 78 79 80 81
+// 108 71 42 43 44 45 46 47 48 49 82
+// 107 70 41 20 21 22 23 24 25 50 83
+// 106 69 40 19 6 7 8 9 26 51 84
+// 105 68 39 18 5 0 1 10 27 52 85
+// 104 67 38 17 4 3 2 11 28 53 86
+// 103 66 37 16 15 14 13 12 29 54 87
+// 102 65 36 35 34 33 32 31 30 55 88
+// 101 64 63 62 61 60 59 58 57 56 89
+// 100 99 98 97 96 95 94 93 92 91 90
+localparam [483:0] XCOORD_DIM_011 = {4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd5};
+localparam [483:0] YCOORD_DIM_011 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd6,4'd5,4'd5};
+
+// DIM_SIZE = 12, PATTERN = SPIRAL
+//------------------------------------
+// 110 111 112 113 114 115 116 117 118 119 120 121
+// 109 72 73 74 75 76 77 78 79 80 81 122
+// 108 71 42 43 44 45 46 47 48 49 82 123
+// 107 70 41 20 21 22 23 24 25 50 83 124
+// 106 69 40 19 6 7 8 9 26 51 84 125
+// 105 68 39 18 5 0 1 10 27 52 85 126
+// 104 67 38 17 4 3 2 11 28 53 86 127
+// 103 66 37 16 15 14 13 12 29 54 87 128
+// 102 65 36 35 34 33 32 31 30 55 88 129
+// 101 64 63 62 61 60 59 58 57 56 89 130
+// 100 99 98 97 96 95 94 93 92 91 90 131
+// 143 142 141 140 139 138 137 136 135 134 133 132
+localparam [575:0] XCOORD_DIM_012 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd5};
+localparam [575:0] YCOORD_DIM_012 = {4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd6,4'd5,4'd5};
+
+// DIM_SIZE = 13, PATTERN = SPIRAL
+//------------------------------------
+// 156 157 158 159 160 161 162 163 164 165 166 167 168
+// 155 110 111 112 113 114 115 116 117 118 119 120 121
+// 154 109 72 73 74 75 76 77 78 79 80 81 122
+// 153 108 71 42 43 44 45 46 47 48 49 82 123
+// 152 107 70 41 20 21 22 23 24 25 50 83 124
+// 151 106 69 40 19 6 7 8 9 26 51 84 125
+// 150 105 68 39 18 5 0 1 10 27 52 85 126
+// 149 104 67 38 17 4 3 2 11 28 53 86 127
+// 148 103 66 37 16 15 14 13 12 29 54 87 128
+// 147 102 65 36 35 34 33 32 31 30 55 88 129
+// 146 101 64 63 62 61 60 59 58 57 56 89 130
+// 145 100 99 98 97 96 95 94 93 92 91 90 131
+// 144 143 142 141 140 139 138 137 136 135 134 133 132
+localparam [675:0] XCOORD_DIM_013 = {4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd6};
+localparam [675:0] YCOORD_DIM_013 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd7,4'd6,4'd6};
+
+// DIM_SIZE = 14, PATTERN = SPIRAL
+//------------------------------------
+// 156 157 158 159 160 161 162 163 164 165 166 167 168 169
+// 155 110 111 112 113 114 115 116 117 118 119 120 121 170
+// 154 109 72 73 74 75 76 77 78 79 80 81 122 171
+// 153 108 71 42 43 44 45 46 47 48 49 82 123 172
+// 152 107 70 41 20 21 22 23 24 25 50 83 124 173
+// 151 106 69 40 19 6 7 8 9 26 51 84 125 174
+// 150 105 68 39 18 5 0 1 10 27 52 85 126 175
+// 149 104 67 38 17 4 3 2 11 28 53 86 127 176
+// 148 103 66 37 16 15 14 13 12 29 54 87 128 177
+// 147 102 65 36 35 34 33 32 31 30 55 88 129 178
+// 146 101 64 63 62 61 60 59 58 57 56 89 130 179
+// 145 100 99 98 97 96 95 94 93 92 91 90 131 180
+// 144 143 142 141 140 139 138 137 136 135 134 133 132 181
+// 195 194 193 192 191 190 189 188 187 186 185 184 183 182
+localparam [783:0] XCOORD_DIM_014 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd6};
+localparam [783:0] YCOORD_DIM_014 = {4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd7,4'd6,4'd6};
+
+// DIM_SIZE = 15, PATTERN = SPIRAL
+//------------------------------------
+// 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
+// 209 156 157 158 159 160 161 162 163 164 165 166 167 168 169
+// 208 155 110 111 112 113 114 115 116 117 118 119 120 121 170
+// 207 154 109 72 73 74 75 76 77 78 79 80 81 122 171
+// 206 153 108 71 42 43 44 45 46 47 48 49 82 123 172
+// 205 152 107 70 41 20 21 22 23 24 25 50 83 124 173
+// 204 151 106 69 40 19 6 7 8 9 26 51 84 125 174
+// 203 150 105 68 39 18 5 0 1 10 27 52 85 126 175
+// 202 149 104 67 38 17 4 3 2 11 28 53 86 127 176
+// 201 148 103 66 37 16 15 14 13 12 29 54 87 128 177
+// 200 147 102 65 36 35 34 33 32 31 30 55 88 129 178
+// 199 146 101 64 63 62 61 60 59 58 57 56 89 130 179
+// 198 145 100 99 98 97 96 95 94 93 92 91 90 131 180
+// 197 144 143 142 141 140 139 138 137 136 135 134 133 132 181
+// 196 195 194 193 192 191 190 189 188 187 186 185 184 183 182
+localparam [899:0] XCOORD_DIM_015 = {4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd7};
+localparam [899:0] YCOORD_DIM_015 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd8,4'd7,4'd7};
+
+// DIM_SIZE = 16, PATTERN = SPIRAL
+//------------------------------------
+// 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
+// 209 156 157 158 159 160 161 162 163 164 165 166 167 168 169 226
+// 208 155 110 111 112 113 114 115 116 117 118 119 120 121 170 227
+// 207 154 109 72 73 74 75 76 77 78 79 80 81 122 171 228
+// 206 153 108 71 42 43 44 45 46 47 48 49 82 123 172 229
+// 205 152 107 70 41 20 21 22 23 24 25 50 83 124 173 230
+// 204 151 106 69 40 19 6 7 8 9 26 51 84 125 174 231
+// 203 150 105 68 39 18 5 0 1 10 27 52 85 126 175 232
+// 202 149 104 67 38 17 4 3 2 11 28 53 86 127 176 233
+// 201 148 103 66 37 16 15 14 13 12 29 54 87 128 177 234
+// 200 147 102 65 36 35 34 33 32 31 30 55 88 129 178 235
+// 199 146 101 64 63 62 61 60 59 58 57 56 89 130 179 236
+// 198 145 100 99 98 97 96 95 94 93 92 91 90 131 180 237
+// 197 144 143 142 141 140 139 138 137 136 135 134 133 132 181 238
+// 196 195 194 193 192 191 190 189 188 187 186 185 184 183 182 239
+// 255 254 253 252 251 250 249 248 247 246 245 244 243 242 241 240
+localparam [1023:0] XCOORD_DIM_016 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd7};
+localparam [1023:0] YCOORD_DIM_016 = {4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd8,4'd7,4'd7};
+
+
+function [CLOG2_DIM_SIZE-1:0] node_to_xdst;
+ input [WIDTH-1:0] header;
+begin
+ if (DIM_SIZE == 2)
+ node_to_xdst = XCOORD_DIM_002[1*header[1:0] +: 1];
+ else if (DIM_SIZE == 3)
+ node_to_xdst = XCOORD_DIM_003[2*header[3:0] +: 2];
+ else if (DIM_SIZE == 4)
+ node_to_xdst = XCOORD_DIM_004[2*header[3:0] +: 2];
+ else if (DIM_SIZE == 5)
+ node_to_xdst = XCOORD_DIM_005[3*header[4:0] +: 3];
+ else if (DIM_SIZE == 6)
+ node_to_xdst = XCOORD_DIM_006[3*header[5:0] +: 3];
+ else if (DIM_SIZE == 7)
+ node_to_xdst = XCOORD_DIM_007[3*header[5:0] +: 3];
+ else if (DIM_SIZE == 8)
+ node_to_xdst = XCOORD_DIM_008[3*header[5:0] +: 3];
+ else if (DIM_SIZE == 9)
+ node_to_xdst = XCOORD_DIM_009[4*header[6:0] +: 4];
+ else if (DIM_SIZE == 10)
+ node_to_xdst = XCOORD_DIM_010[4*header[6:0] +: 4];
+ else if (DIM_SIZE == 11)
+ node_to_xdst = XCOORD_DIM_011[4*header[6:0] +: 4];
+ else if (DIM_SIZE == 12)
+ node_to_xdst = XCOORD_DIM_012[4*header[7:0] +: 4];
+ else if (DIM_SIZE == 13)
+ node_to_xdst = XCOORD_DIM_013[4*header[7:0] +: 4];
+ else if (DIM_SIZE == 14)
+ node_to_xdst = XCOORD_DIM_014[4*header[7:0] +: 4];
+ else if (DIM_SIZE == 15)
+ node_to_xdst = XCOORD_DIM_015[4*header[7:0] +: 4];
+ else if (DIM_SIZE == 16)
+ node_to_xdst = XCOORD_DIM_016[4*header[7:0] +: 4];
+ else
+ node_to_xdst = {CLOG2_DIM_SIZE{1'd0}};
+end endfunction
+
+function [CLOG2_DIM_SIZE-1:0] node_to_ydst;
+ input [WIDTH-1:0] header;
+begin
+ if (DIM_SIZE == 2)
+ node_to_ydst = YCOORD_DIM_002[1*header[1:0] +: 1];
+ else if (DIM_SIZE == 3)
+ node_to_ydst = YCOORD_DIM_003[2*header[3:0] +: 2];
+ else if (DIM_SIZE == 4)
+ node_to_ydst = YCOORD_DIM_004[2*header[3:0] +: 2];
+ else if (DIM_SIZE == 5)
+ node_to_ydst = YCOORD_DIM_005[3*header[4:0] +: 3];
+ else if (DIM_SIZE == 6)
+ node_to_ydst = YCOORD_DIM_006[3*header[5:0] +: 3];
+ else if (DIM_SIZE == 7)
+ node_to_ydst = YCOORD_DIM_007[3*header[5:0] +: 3];
+ else if (DIM_SIZE == 8)
+ node_to_ydst = YCOORD_DIM_008[3*header[5:0] +: 3];
+ else if (DIM_SIZE == 9)
+ node_to_ydst = YCOORD_DIM_009[4*header[6:0] +: 4];
+ else if (DIM_SIZE == 10)
+ node_to_ydst = YCOORD_DIM_010[4*header[6:0] +: 4];
+ else if (DIM_SIZE == 11)
+ node_to_ydst = YCOORD_DIM_011[4*header[6:0] +: 4];
+ else if (DIM_SIZE == 12)
+ node_to_ydst = YCOORD_DIM_012[4*header[7:0] +: 4];
+ else if (DIM_SIZE == 13)
+ node_to_ydst = YCOORD_DIM_013[4*header[7:0] +: 4];
+ else if (DIM_SIZE == 14)
+ node_to_ydst = YCOORD_DIM_014[4*header[7:0] +: 4];
+ else if (DIM_SIZE == 15)
+ node_to_ydst = YCOORD_DIM_015[4*header[7:0] +: 4];
+ else if (DIM_SIZE == 16)
+ node_to_ydst = YCOORD_DIM_016[4*header[7:0] +: 4];
+ else
+ node_to_ydst = {CLOG2_DIM_SIZE{1'd0}};
+end endfunction
+
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl
new file mode 100644
index 000000000..39440b512
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl
@@ -0,0 +1,18 @@
+#!/usr/bin/python3
+#
+# Copyright 2018 Ettus Research, a National Instruments Company
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+
+create_project tmp_proj -part xc7k410tffg900-3 -in_memory
+add_files {axis_ctrl_crossbar_nxn_top.v ../axis_ctrl_crossbar_nxn.v ../axis_ctrl_crossbar_2d_mesh.v ../mesh_2d_dor_router_multi_sw.v ../axis_switch.v ../axis_ingress_vc_buff.v ../mesh_node_mapping.vh ../mesh_2d_dor_router_single_sw.v ../torus_2d_dor_router_single_sw.v ../torus_2d_dor_router_multi_sw.v ../axis_port_terminator.v}
+add_files {../../../fifo/axi_fifo_flop.v ../../../fifo/axi_fifo_flop2.v ../../../fifo/axi_fifo.v ../../../fifo/axi_mux_select.v ../../../fifo/axi_fifo_bram.v ../../../fifo/axi_fifo_cascade.v ../../../fifo/axi_mux.v ../../../fifo/axi_fifo_short.v ../../../fifo/axi_demux.v ../../../fifo/axi_packet_gate.v ../../../control/map/cam_priority_encoder.v ../../../control/map/cam_srl.v ../../../control/map/cam_bram.v ../../../control/map/cam.v ../../../control/map/kv_map.v ../../../control/map/axis_muxed_kv_map.v ../../../control/ram_2port.v}
+set_property top axis_ctrl_crossbar_nxn_top [current_fileset]
+synth_design
+create_clock -name clk -period 2.0 [get_ports clk]
+report_utilization -no_primitives -file axis_ctrl_crossbar_nxn.rpt
+report_timing_summary -setup -no_detailed_paths -no_header -datasheet -append -file axis_ctrl_crossbar_nxn.rpt
+write_checkpoint -force axis_ctrl_crossbar_nxn.dcp
+close_project
+exit \ No newline at end of file
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in
new file mode 100644
index 000000000..6805100b9
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in
@@ -0,0 +1,47 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+
+module axis_ctrl_crossbar_nxn_top(
+ input clk,
+ input rst
+);
+ // Router global config
+ localparam IMPL = "{top}";
+ localparam NPORTS = {ports};
+ localparam DWIDTH = {dataw};
+ localparam MTU = {mtu};
+ localparam ROUTING = "{ralloc}";
+
+ (* dont_touch = "true"*) wire [(DWIDTH*NPORTS)-1:0] s_axis_tdata , m_axis_tdata ;
+ (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tlast , m_axis_tlast ;
+ (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tvalid, m_axis_tvalid;
+ (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tready, m_axis_tready;
+ (* dont_touch = "true"*) wire deadlock_detected;
+
+ axis_ctrl_crossbar_nxn #(
+ .WIDTH (DWIDTH),
+ .NPORTS (NPORTS),
+ .TOPOLOGY (IMPL),
+ .INGRESS_BUFF_SIZE(MTU),
+ .ROUTER_BUFF_SIZE (MTU),
+ .ROUTING_ALLOC (ROUTING),
+ .SWITCH_ALLOC ("ROUND-ROBIN")
+ ) router_dut_i (
+ .clk (clk),
+ .reset (rst),
+ .s_axis_tdata (s_axis_tdata ),
+ .s_axis_tlast (s_axis_tlast ),
+ .s_axis_tvalid (s_axis_tvalid),
+ .s_axis_tready (s_axis_tready),
+ .m_axis_tdata (m_axis_tdata ),
+ .m_axis_tlast (m_axis_tlast ),
+ .m_axis_tvalid (m_axis_tvalid),
+ .m_axis_tready (m_axis_tready),
+ .deadlock_detected(deadlock_detected)
+ );
+
+endmodule
+
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl
new file mode 100644
index 000000000..304384aee
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl
@@ -0,0 +1,18 @@
+#!/usr/bin/python3
+#
+# Copyright 2018 Ettus Research, a National Instruments Company
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+
+create_project tmp_proj -part xc7k410tffg900-3 -in_memory
+add_files {chdr_crossbar_nxn_top.v ../chdr_crossbar_nxn.v ../axis_switch.v ../chdr_xb_ingress_buff.v ../chdr_xb_routing_table.v ../../core/chdr_mgmt_pkt_handler.v ../../core/rfnoc_chdr_utils.vh ../../core/rfnoc_chdr_internal_utils.vh}
+add_files {../../../fifo/axi_fifo_flop.v ../../../fifo/axi_fifo_flop2.v ../../../fifo/axi_fifo.v ../../../fifo/axi_mux_select.v ../../../fifo/axi_fifo_bram.v ../../../fifo/axi_fifo_cascade.v ../../../fifo/axi_mux.v ../../../fifo/axi_fifo_short.v ../../../fifo/axi_demux.v ../../../fifo/axi_packet_gate.v ../../../control/map/cam_priority_encoder.v ../../../control/map/cam_srl.v ../../../control/map/cam_bram.v ../../../control/map/cam.v ../../../control/map/kv_map.v ../../../control/map/axis_muxed_kv_map.v ../../../control/ram_2port.v}
+set_property top chdr_crossbar_nxn_top [current_fileset]
+synth_design
+create_clock -name clk -period 2.0 [get_ports clk]
+report_utilization -no_primitives -file chdr_crossbar_nxn.rpt
+report_timing_summary -setup -no_detailed_paths -no_header -datasheet -append -file chdr_crossbar_nxn.rpt
+write_checkpoint -force chdr_crossbar_nxn.dcp
+close_project
+exit \ No newline at end of file
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in
new file mode 100644
index 000000000..fbf0852a3
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in
@@ -0,0 +1,55 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+
+module chdr_crossbar_nxn_top(
+ input clk,
+ input rst
+);
+ // Router global config
+ localparam NPORTS = {ports};
+ localparam DWIDTH = {dataw};
+ localparam MTU = {mtu};
+ localparam RLUT_SIZE = {rlutsize};
+ localparam OPTIMIZE = "{opt}";
+
+ (* dont_touch = "true"*) wire [(DWIDTH*NPORTS)-1:0] s_axis_tdata , m_axis_tdata ;
+ (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tlast , m_axis_tlast ;
+ (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tvalid, m_axis_tvalid;
+ (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tready, m_axis_tready;
+
+ chdr_crossbar_nxn #(
+ .CHDR_W (DWIDTH),
+ .NPORTS (NPORTS),
+ .DEFAULT_PORT (0),
+ .MTU (MTU),
+ .ROUTE_TBL_SIZE (RLUT_SIZE),
+ .MUX_ALLOC ("ROUND-ROBIN"),
+ .OPTIMIZE (OPTIMIZE),
+ .NPORTS_MGMT (NPORTS),
+ .EXT_RTCFG_PORT (1)
+ ) router_dut_i (
+ // General
+ .clk (clk),
+ .reset (rst),
+ // Inputs
+ .s_axis_tdata (s_axis_tdata),
+ .s_axis_tlast (s_axis_tlast),
+ .s_axis_tvalid (s_axis_tvalid),
+ .s_axis_tready (s_axis_tready),
+ // Output
+ .m_axis_tdata (m_axis_tdata),
+ .m_axis_tlast (m_axis_tlast),
+ .m_axis_tvalid (m_axis_tvalid),
+ .m_axis_tready (m_axis_tready),
+ // External rtcfg port
+ .ext_rtcfg_stb (0),
+ .ext_rtcfg_addr (0),
+ .ext_rtcfg_data (0),
+ .ext_rtcfg_ack ()
+ );
+
+endmodule
+
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py
new file mode 100755
index 000000000..4ca6e07fa
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py
@@ -0,0 +1,37 @@
+#! /usr/bin/python3
+#!/usr/bin/python3
+#
+# Copyright 2018 Ettus Research, a National Instruments Company
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+
+import argparse
+import synth_run
+
+modname = 'axis_ctrl_crossbar_nxn'
+
+# Parse command line options
+def get_options():
+ parser = argparse.ArgumentParser(description='Generate synthesis results for ' + modname)
+ parser.add_argument('--top', type=str, default='TORUS', help='Topologies (CSV)')
+ parser.add_argument('--ports', type=str, default='8', help='Number of ports (CSV)')
+ parser.add_argument('--dataw', type=str, default='32', help='Router datapath width (CSV)')
+ parser.add_argument('--mtu', type=str, default='5', help='MTU (CSV)')
+ parser.add_argument('--ralloc', type=str, default='WORMHOLE', help='Router allocation method (CSV)')
+ return parser.parse_args()
+
+def main():
+ args = get_options()
+ keys = ['top', 'ports', 'dataw', 'mtu', 'ralloc']
+ for top in args.top.strip().split(','):
+ for ports in args.ports.strip().split(','):
+ for dataw in args.dataw.strip().split(','):
+ for mtu in args.mtu.strip().split(','):
+ for ralloc in args.ralloc.strip().split(','):
+ # Collect parameters
+ transform = {'ports':ports, 'dataw':dataw, 'mtu':mtu, 'top':top, 'ralloc':ralloc}
+ synth_run.synth_run(modname, keys, transform)
+
+if __name__ == '__main__':
+ main()
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py
new file mode 100755
index 000000000..668e7a247
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py
@@ -0,0 +1,37 @@
+#! /usr/bin/python3
+#!/usr/bin/python3
+#
+# Copyright 2018 Ettus Research, a National Instruments Company
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+
+import argparse
+import synth_run
+
+modname = 'chdr_crossbar_nxn'
+
+# Parse command line options
+def get_options():
+ parser = argparse.ArgumentParser(description='Generate synthesis results for ' + modname)
+ parser.add_argument('--opt', type=str, default='AREA', help='Optimization strategies (CSV)')
+ parser.add_argument('--ports', type=str, default='8', help='Number of ports (CSV)')
+ parser.add_argument('--dataw', type=str, default='64', help='Router datapath width (CSV)')
+ parser.add_argument('--mtu', type=str, default='10', help='MTU or Ingress buffer size (CSV)')
+ parser.add_argument('--rlutsize', type=str, default='6', help='Router lookup table size (CSV)')
+ return parser.parse_args()
+
+def main():
+ args = get_options()
+ keys = ['opt', 'ports', 'dataw', 'mtu', 'rlutsize']
+ for opt in args.opt.strip().split(','):
+ for ports in args.ports.strip().split(','):
+ for dataw in args.dataw.strip().split(','):
+ for mtu in args.mtu.strip().split(','):
+ for rlutsize in args.rlutsize.strip().split(','):
+ # Collect parameters
+ transform = {'opt':opt, 'ports':ports, 'dataw':dataw, 'mtu':mtu, 'rlutsize':rlutsize}
+ synth_run.synth_run(modname, keys, transform)
+
+if __name__ == '__main__':
+ main()
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py
new file mode 100644
index 000000000..a9801ac20
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py
@@ -0,0 +1,67 @@
+#! /usr/bin/python3
+#!/usr/bin/python3
+#
+# Copyright 2018 Ettus Research, a National Instruments Company
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+
+import sys, os
+import subprocess
+import re
+
+def synth_run(modname, keys, transform):
+ prefix = modname + '_' + ('_'.join(['%s%s'%(k,transform[k]) for k in keys]))
+ print('='*(len(prefix)+2))
+ print(' %s '%(prefix))
+ print('='*(len(prefix)+2))
+ # Write Verilog top-level file
+ with open(modname + '_top.v.in', 'r') as in_file:
+ with open(modname + '_top.v', 'w') as out_file:
+ out_file.write(in_file.read().format(**transform))
+ # Run Vivado
+ exitcode = subprocess.Popen(
+ 'vivado -mode tcl -source %s_top.tcl -nolog -nojou'%(modname), shell=True
+ ).wait()
+ if exitcode != 0:
+ raise RuntimeError('Error running vivado. Was setupenv.sh run?')
+ # Extract info
+ lut = 100.0
+ reg = 100.0
+ bram = 100.0
+ dsp = 100.0
+ fmax = 0.0
+ with open(modname + '.rpt', 'r') as rpt_file:
+ rpt = rpt_file.readlines()
+ for line in rpt:
+ lm = re.match(r'.*Slice LUTs\*.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line)
+ if lm is not None:
+ lut = float(lm.group(1).strip())
+ rm = re.match(r'.*Slice Registers.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line)
+ if rm is not None:
+ reg = float(rm.group(1).strip())
+ bm = re.match(r'.*Block RAM Tile.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line)
+ if bm is not None:
+ bram = float(bm.group(1).strip())
+ dm = re.match(r'.*DSPs.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line)
+ if dm is not None:
+ dsp = float(dm.group(1).strip())
+ tm = re.match(r'.*clk.*\| clk\s*\|(.*)\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|', line)
+ if tm is not None:
+ fmax = 1000.0/float(tm.group(1).strip())
+ # Save report
+ os.rename(modname + '.rpt', prefix + '.rpt')
+ os.rename(modname + '.dcp', prefix + '.dcp')
+ try:
+ os.remove(modname + '_top.v')
+ os.remove('fsm_encoding.os')
+ except FileNotFoundError:
+ pass
+ # Write summary report line
+ res_keys = ['lut','reg','bram','dsp','fmax']
+ res = {'lut':lut, 'reg':reg, 'bram':bram, 'dsp':dsp, 'fmax':fmax, 'prefix':prefix}
+ if not os.path.exists(modname + '_summary.csv'):
+ with open(modname + '_summary.csv', 'w') as summaryf:
+ summaryf.write((','.join(keys + res_keys)) + '\n')
+ with open(modname + '_summary.csv', 'a') as summaryf:
+ summaryf.write((','.join(['%s'%(transform[k]) for k in keys])) + ',' + (','.join(['%.1f'%(res[k]) for k in res_keys])) + '\n')
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v
new file mode 100644
index 000000000..cd70450a0
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v
@@ -0,0 +1,338 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: torus_2d_dor_router_multi_sw
+// Description:
+// Alternate implementation for torus_2d_dor_router_single_sw with
+// multiple switches for independent paths between inputs and outputs
+// **NOTE**: This module has not been validated
+
+module torus_2d_dor_router_multi_sw #(
+ parameter WIDTH = 64,
+ parameter DIM_SIZE = 4,
+ parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0,
+ parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0,
+ parameter TERM_BUFF_SIZE = 5,
+ parameter XB_BUFF_SIZE = 5,
+ parameter ROUTING_ALLOC = "WORMHOLE"
+) (
+ // Clocks and resets
+ input wire clk,
+ input wire reset,
+
+ // Terminal connections
+ input wire [WIDTH-1:0] s_axis_term_tdata,
+ input wire s_axis_term_tlast,
+ input wire s_axis_term_tvalid,
+ output wire s_axis_term_tready,
+ output wire [WIDTH-1:0] m_axis_term_tdata,
+ output wire m_axis_term_tlast,
+ output wire m_axis_term_tvalid,
+ input wire m_axis_term_tready,
+
+ // X-dimension inter-XB connections
+ input wire [WIDTH-1:0] s_axis_xdim_tdata,
+ input wire [0:0] s_axis_xdim_tdest,
+ input wire s_axis_xdim_tlast,
+ input wire s_axis_xdim_tvalid,
+ output wire s_axis_xdim_tready,
+ output wire [WIDTH-1:0] m_axis_xdim_tdata,
+ output wire [0:0] m_axis_xdim_tdest,
+ output wire m_axis_xdim_tlast,
+ output wire m_axis_xdim_tvalid,
+ input wire m_axis_xdim_tready,
+
+ // Y-dimension inter-XB connections
+ input wire [WIDTH-1:0] s_axis_ydim_tdata,
+ input wire [0:0] s_axis_ydim_tdest,
+ input wire s_axis_ydim_tlast,
+ input wire s_axis_ydim_tvalid,
+ output wire s_axis_ydim_tready,
+ output wire [WIDTH-1:0] m_axis_ydim_tdata,
+ output wire [0:0] m_axis_ydim_tdest,
+ output wire m_axis_ydim_tlast,
+ output wire m_axis_ydim_tvalid,
+ input wire m_axis_ydim_tready
+);
+
+ // -------------------------------------------------
+ // Routing functions
+ // -------------------------------------------------
+ `include "mesh_node_mapping.vh"
+
+ function [2:0] term_in_route;
+ input [WIDTH:0] header;
+ reg [$clog2(DIM_SIZE)-1:0] xdst, ydst, xdiff, ydiff;
+ begin
+ xdst = node_to_xdst(header);
+ ydst = node_to_ydst(header);
+ xdiff = xdst - XB_ADDR_X;
+ ydiff = ydst - XB_ADDR_Y;
+ // Routing logic
+ // - MSB is the VC, 2 LSBs are the router destination
+ // - Long journeys get VC = 1 to bypass local traffic
+ if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin
+ term_in_route = {1'b0 /* VC don't care */, 2'd2 /* term out */};
+ end else if (xdst == XB_ADDR_X) begin
+ term_in_route = {ydiff[$clog2(DIM_SIZE)-1], 2'd0 /* ydim out */};
+ end else begin
+ term_in_route = {xdiff[$clog2(DIM_SIZE)-1], 2'd1 /* xdim out */};
+ end
+ end
+ endfunction
+
+ function [2:0] xdim_in_route;
+ input [WIDTH:0] header;
+ reg [$clog2(DIM_SIZE)-1:0] xdst, ydst, xdiff, ydiff;
+ begin
+ xdst = node_to_xdst(header);
+ ydst = node_to_ydst(header);
+ xdiff = xdst - XB_ADDR_X;
+ ydiff = ydst - XB_ADDR_Y;
+ // Routing logic
+ // - MSB is the VC, 2 LSBs are the router destination
+ // - Long journeys get VC = 1 to bypass local traffic
+ if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin
+ xdim_in_route = {1'b0 /* VC don't care */, 2'd2 /* term out */};
+ end else if (xdst == XB_ADDR_X) begin
+ xdim_in_route = {ydiff[$clog2(DIM_SIZE)-1], 2'd0 /* ydim out */};
+ end else begin
+ xdim_in_route = {xdiff[$clog2(DIM_SIZE)-1], 2'd1 /* xdim out */};
+ end
+ end
+ endfunction
+
+ function [1:0] ydim_in_route;
+ input [WIDTH:0] header;
+ reg [$clog2(DIM_SIZE)-1:0] ydst, ydiff;
+ begin
+ ydst = node_to_ydst(header);
+ ydiff = ydst - XB_ADDR_Y;
+ // Routing logic
+ // - MSB is the VC, LSB is the router destination
+ // - Long journeys get VC = 1 to bypass local traffic
+ if (ydst == XB_ADDR_Y) begin
+ ydim_in_route = {1'b0 /* VC don't care */, 1'd1 /* term out */};
+ end else begin
+ ydim_in_route = {ydiff[$clog2(DIM_SIZE)-1], 1'd0 /* ydim out */};
+ end
+ end
+ endfunction
+
+ // -------------------------------------------------
+ // Input demuxes
+ // -------------------------------------------------
+ wire [WIDTH-1:0] ti_gt_tdata;
+ wire ti_gt_tdest;
+ wire ti_gt_tlast;
+ wire ti_gt_tvalid;
+ wire ti_gt_tready;
+ wire [WIDTH-1:0] t2t_tdata, t2x_tdata, t2y_tdata;
+ wire t2t_tdest, t2x_tdest, t2y_tdest;
+ wire t2t_tlast, t2x_tlast, t2y_tlast;
+ wire t2t_tvalid, t2x_tvalid, t2y_tvalid;
+ wire t2t_tready, t2x_tready, t2y_tready;
+ wire [WIDTH-1:0] term_in_hdr;
+ wire [1:0] term_in_port;
+
+ assign {ti_gt_tdest, term_in_port} = term_in_route(term_in_hdr);
+
+ axi_packet_gate #(
+ .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE)
+ ) term_in_pkt_gate_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata (s_axis_term_tdata),
+ .i_tlast (s_axis_term_tlast),
+ .i_tvalid (s_axis_term_tvalid),
+ .i_tready (s_axis_term_tready),
+ .i_terror (1'b0),
+ .o_tdata (ti_gt_tdata),
+ .o_tlast (ti_gt_tlast),
+ .o_tvalid (ti_gt_tvalid),
+ .o_tready (ti_gt_tready)
+ );
+
+ axi_demux #(
+ .WIDTH(WIDTH+1), .SIZE(3),
+ .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0)
+ ) term_in_demux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .header (term_in_hdr),
+ .dest (term_in_port),
+ .i_tdata ({ti_gt_tdest, ti_gt_tdata}),
+ .i_tlast (ti_gt_tlast),
+ .i_tvalid (ti_gt_tvalid),
+ .i_tready (ti_gt_tready),
+ .o_tdata ({t2t_tdest, t2t_tdata, t2x_tdest, t2x_tdata, t2y_tdest, t2y_tdata}),
+ .o_tlast ({t2t_tlast, t2x_tlast, t2y_tlast}),
+ .o_tvalid ({t2t_tvalid, t2x_tvalid, t2y_tvalid}),
+ .o_tready ({t2t_tready, t2x_tready, t2y_tready})
+ );
+
+ wire [WIDTH-1:0] xi_gt_tdata;
+ wire xi_gt_tdest;
+ wire xi_gt_tlast;
+ wire xi_gt_tvalid;
+ wire xi_gt_tready;
+ wire [WIDTH-1:0] x2t_tdata, x2x_tdata, x2y_tdata;
+ wire x2t_tdest, x2x_tdest, x2y_tdest;
+ wire x2t_tlast, x2x_tlast, x2y_tlast;
+ wire x2t_tvalid, x2x_tvalid, x2y_tvalid;
+ wire x2t_tready, x2x_tready, x2y_tready;
+ wire [WIDTH-1:0] xdim_in_hdr;
+ wire [1:0] xdim_in_port;
+
+ assign {xi_gt_tdest, xdim_in_port} = xdim_in_route(xdim_in_hdr);
+
+ axis_ingress_vc_buff #(
+ .WIDTH(WIDTH), .NUM_VCS(2),
+ .SIZE(XB_BUFF_SIZE),
+ .ROUTING(ROUTING_ALLOC)
+ ) xdim_in_vc_buf_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (s_axis_xdim_tdata),
+ .s_axis_tdest (s_axis_xdim_tdest),
+ .s_axis_tlast (s_axis_xdim_tlast),
+ .s_axis_tvalid (s_axis_xdim_tvalid),
+ .s_axis_tready (s_axis_xdim_tready),
+ .m_axis_tdata (xi_gt_tdata),
+ .m_axis_tlast (xi_gt_tlast),
+ .m_axis_tvalid (xi_gt_tvalid),
+ .m_axis_tready (xi_gt_tready)
+ );
+
+ axi_demux #(
+ .WIDTH(WIDTH+1), .SIZE(3),
+ .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0)
+ ) xdim_in_demux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .header (xdim_in_hdr),
+ .dest (xdim_in_port),
+ .i_tdata ({xi_gt_tdest, xi_gt_tdata}),
+ .i_tlast (xi_gt_tlast),
+ .i_tvalid (xi_gt_tvalid),
+ .i_tready (xi_gt_tready),
+ .o_tdata ({x2t_tdest, x2t_tdata, x2x_tdest, x2x_tdata, x2y_tdest, x2y_tdata}),
+ .o_tlast ({x2t_tlast, x2x_tlast, x2y_tlast}),
+ .o_tvalid ({x2t_tvalid, x2x_tvalid, x2y_tvalid}),
+ .o_tready ({x2t_tready, x2x_tready, x2y_tready})
+ );
+
+ wire [WIDTH-1:0] yi_gt_tdata;
+ wire yi_gt_tdest;
+ wire yi_gt_tlast;
+ wire yi_gt_tvalid;
+ wire yi_gt_tready;
+ wire [WIDTH-1:0] y2t_tdata, y2y_tdata;
+ wire y2t_tdest, y2y_tdest;
+ wire y2t_tlast, y2y_tlast;
+ wire y2t_tvalid, y2y_tvalid;
+ wire y2t_tready, y2y_tready;
+ wire [WIDTH-1:0] ydim_in_hdr;
+ wire [0:0] ydim_in_port;
+
+ assign {yi_gt_tdest, ydim_in_port} = ydim_in_route(ydim_in_hdr);
+
+ axis_ingress_vc_buff #(
+ .WIDTH(WIDTH), .NUM_VCS(2),
+ .SIZE(XB_BUFF_SIZE),
+ .ROUTING(ROUTING_ALLOC)
+ ) ydim_in_vc_buf_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (s_axis_ydim_tdata ),
+ .s_axis_tdest (s_axis_ydim_tdest ),
+ .s_axis_tlast (s_axis_ydim_tlast ),
+ .s_axis_tvalid (s_axis_ydim_tvalid),
+ .s_axis_tready (s_axis_ydim_tready),
+ .m_axis_tdata (yi_gt_tdata ),
+ .m_axis_tlast (yi_gt_tlast ),
+ .m_axis_tvalid (yi_gt_tvalid),
+ .m_axis_tready (yi_gt_tready)
+ );
+
+ axi_demux #(
+ .WIDTH(WIDTH+1), .SIZE(2),
+ .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0)
+ ) ydim_in_demux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .header (ydim_in_hdr),
+ .dest (ydim_in_port),
+ .i_tdata ({yi_gt_tdest, yi_gt_tdata}),
+ .i_tlast (yi_gt_tlast),
+ .i_tvalid (yi_gt_tvalid),
+ .i_tready (yi_gt_tready),
+ .o_tdata ({y2t_tdest, y2t_tdata, y2y_tdest, y2y_tdata}),
+ .o_tlast ({y2t_tlast, y2y_tlast}),
+ .o_tvalid ({y2t_tvalid, y2y_tvalid}),
+ .o_tready ({y2t_tready, y2y_tready})
+ );
+
+ // -------------------------------------------------
+ // Output muxes
+ // -------------------------------------------------
+ wire term_tdest_discard;
+ axi_mux #(
+ .WIDTH(WIDTH+1), .SIZE(3),
+ .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1)
+ ) term_out_mux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata ({t2t_tdest, t2t_tdata, x2t_tdest, x2t_tdata, y2t_tdest, y2t_tdata}),
+ .i_tlast ({t2t_tlast, x2t_tlast, y2t_tlast }),
+ .i_tvalid ({t2t_tvalid, x2t_tvalid, y2t_tvalid}),
+ .i_tready ({t2t_tready, x2t_tready, y2t_tready}),
+ .o_tdata ({term_tdest_discard, m_axis_term_tdata}),
+ .o_tlast (m_axis_term_tlast),
+ .o_tvalid (m_axis_term_tvalid),
+ .o_tready (m_axis_term_tready)
+ );
+
+ axi_mux #(
+ .WIDTH(WIDTH+1), .SIZE(2),
+ .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1)
+ ) xdim_out_mux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata ({t2x_tdest, t2x_tdata, x2x_tdest, x2x_tdata}),
+ .i_tlast ({t2x_tlast, x2x_tlast}),
+ .i_tvalid ({t2x_tvalid, x2x_tvalid}),
+ .i_tready ({t2x_tready, x2x_tready}),
+ .o_tdata ({m_axis_xdim_tdest, m_axis_xdim_tdata}),
+ .o_tlast (m_axis_xdim_tlast ),
+ .o_tvalid (m_axis_xdim_tvalid),
+ .o_tready (m_axis_xdim_tready)
+ );
+
+ axi_mux #(
+ .WIDTH(WIDTH+1), .SIZE(3),
+ .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1)
+ ) ydim_out_mux_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata ({t2y_tdest, t2y_tdata, x2y_tdest, x2y_tdata, y2y_tdest, y2y_tdata}),
+ .i_tlast ({t2y_tlast, x2y_tlast, y2y_tlast }),
+ .i_tvalid ({t2y_tvalid, x2y_tvalid, y2y_tvalid}),
+ .i_tready ({t2y_tready, x2y_tready, y2y_tready}),
+ .o_tdata ({m_axis_ydim_tdest, m_axis_ydim_tdata}),
+ .o_tlast (m_axis_ydim_tlast),
+ .o_tvalid (m_axis_ydim_tvalid),
+ .o_tready (m_axis_ydim_tready)
+ );
+
+endmodule
+
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v
new file mode 100644
index 000000000..21a66782d
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v
@@ -0,0 +1,294 @@
+//
+// Copyright 2018 Ettus Research, A National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// Module: torus_2d_dor_router_single_sw
+// Description:
+// This module implements the router for a 2-dimentional (2d)
+// torus network that uses dimension order routing (dor) and has a
+// single underlying switch (single_sw). It uses AXI-Stream for all of its
+// links.
+// The torus topology, routing algorithms and the router architecture is
+// described in README.md in this directory.
+// Parameters:
+// - WIDTH: Width of the AXI-Stream data bus
+// - DIM_SIZE: Number of routers alone one dimension
+// - XB_ADDR_X: The X-coordinate of this router in the topology
+// - XB_ADDR_Y: The Y-coordinate of this router in the topology
+// - TERM_BUFF_SIZE: log2 of the ingress terminal buffer size (in words)
+// - XB_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words)
+// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers.
+// * WORMHOLE: Allocate route as soon as first word in pkt arrives
+// * CUT-THROUGH: Allocate route only after the full pkt arrives
+// - SWITCH_ALLOC: Algorithm to allocate the switch
+// * PRIO: Priority based. Priority: Y-dim > X-dim > Term
+// * ROUND-ROBIN: Round robin input port allocation
+// Signals:
+// - *_axis_term_*: Terminal ports (master/slave)
+// - *_axis_xdim_*: Inter-router X-dim connections (master/slave)
+// - *_axis_ydim_*: Inter-router Y-dim connections (master/slave)
+//
+
+module torus_2d_dor_router_single_sw #(
+ parameter WIDTH = 64,
+ parameter DIM_SIZE = 4,
+ parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0,
+ parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0,
+ parameter TERM_BUFF_SIZE = 5,
+ parameter XB_BUFF_SIZE = 5,
+ parameter ROUTING_ALLOC = "WORMHOLE",
+ parameter SWITCH_ALLOC = "PRIO"
+) (
+ // Clocks and resets
+ input wire clk,
+ input wire reset,
+
+ // Terminal connections
+ input wire [WIDTH-1:0] s_axis_term_tdata,
+ input wire s_axis_term_tlast,
+ input wire s_axis_term_tvalid,
+ output wire s_axis_term_tready,
+ output wire [WIDTH-1:0] m_axis_term_tdata,
+ output wire m_axis_term_tlast,
+ output wire m_axis_term_tvalid,
+ input wire m_axis_term_tready,
+
+ // X-dimension inter-XB connections
+ input wire [WIDTH-1:0] s_axis_xdim_tdata,
+ input wire [0:0] s_axis_xdim_tdest,
+ input wire s_axis_xdim_tlast,
+ input wire s_axis_xdim_tvalid,
+ output wire s_axis_xdim_tready,
+ output wire [WIDTH-1:0] m_axis_xdim_tdata,
+ output wire [0:0] m_axis_xdim_tdest,
+ output wire m_axis_xdim_tlast,
+ output wire m_axis_xdim_tvalid,
+ input wire m_axis_xdim_tready,
+
+ // Y-dimension inter-XB connections
+ input wire [WIDTH-1:0] s_axis_ydim_tdata,
+ input wire [0:0] s_axis_ydim_tdest,
+ input wire s_axis_ydim_tlast,
+ input wire s_axis_ydim_tvalid,
+ output wire s_axis_ydim_tready,
+ output wire [WIDTH-1:0] m_axis_ydim_tdata,
+ output wire [0:0] m_axis_ydim_tdest,
+ output wire m_axis_ydim_tlast,
+ output wire m_axis_ydim_tvalid,
+ input wire m_axis_ydim_tready
+);
+
+ //-------------------------------------------------
+ // Routing and switch allocation functions
+ //-------------------------------------------------
+
+ // mesh_node_mapping.vh file contains the mapping between the node number
+ // and its XY coordinates. It is autogenerated and defines the node_to_xdst
+ // and node_to_ydst functions.
+ `include "mesh_node_mapping.vh"
+
+ localparam [1:0] SW_DEST_TERM = 2'd0;
+ localparam [1:0] SW_DEST_XDIM = 2'd1;
+ localparam [1:0] SW_DEST_YDIM = 2'd2;
+ localparam [1:0] SW_NUM_DESTS = 2'd3;
+
+ // The compute_switch_tdest function is the destination selector
+ // i.e. it will inspecte the bottom $clog2(DIM_SIZE)*2 bits of the
+ // first word of a packet and determine the destination of the packet.
+ function [2:0] compute_switch_tdest;
+ input [WIDTH-1:0] header;
+ reg [$clog2(DIM_SIZE)-1:0] xdst, ydst;
+ reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff;
+ begin
+ xdst = node_to_xdst(header);
+ ydst = node_to_ydst(header);
+ xdiff = xdst - XB_ADDR_X;
+ ydiff = ydst - XB_ADDR_Y;
+ // Routing logic
+ // - MSB is the VC, 2 LSBs are the router destination
+ // - Long journeys get VC = 1 to bypass local traffic
+ if (xdiff == 'd0 && ydiff == 'd0) begin
+ compute_switch_tdest = {1'b0 /* VC don't care */, SW_DEST_TERM};
+ end else if (xdiff != 'd0) begin
+ compute_switch_tdest = {(xdiff < 0), SW_DEST_XDIM};
+ end else begin
+ compute_switch_tdest = {(ydiff < 0), SW_DEST_YDIM};
+ end
+ //$display("xdst=%d, ydst=%d, xaddr=%d, yaddr=%d, dst=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y, compute_switch_tdest);
+ end
+ endfunction
+
+ // The compute_switch_alloc function is the switch allocation function
+ // i.e. it chooses which input port reserves the switch for packet transfer.
+ // After the switch is allocated, all other ports will be backpressured until
+ // the packet finishes transferring.
+ function [1:0] compute_switch_alloc;
+ input [2:0] pkt_waiting;
+ input [1:0] last_alloc;
+ begin
+ if (pkt_waiting == 3'b000) begin
+ compute_switch_alloc = SW_DEST_TERM;
+ end else if (pkt_waiting == 3'b001) begin
+ compute_switch_alloc = SW_DEST_TERM;
+ end else if (pkt_waiting == 3'b010) begin
+ compute_switch_alloc = SW_DEST_XDIM;
+ end else if (pkt_waiting == 3'b100) begin
+ compute_switch_alloc = SW_DEST_YDIM;
+ end else begin
+ if (SWITCH_ALLOC == "PRIO") begin
+ // Priority: Y-dim > X-dim > Term
+ if (pkt_waiting[SW_DEST_YDIM])
+ compute_switch_alloc = SW_DEST_YDIM;
+ else if (pkt_waiting[SW_DEST_XDIM])
+ compute_switch_alloc = SW_DEST_XDIM;
+ else
+ compute_switch_alloc = SW_DEST_TERM;
+ end else begin
+ // Round-robin
+ if (pkt_waiting[(last_alloc + 3'd1) % SW_NUM_DESTS])
+ compute_switch_alloc = (last_alloc + 3'd1) % SW_NUM_DESTS;
+ else if (pkt_waiting[(last_alloc + 3'd2) % SW_NUM_DESTS])
+ compute_switch_alloc = (last_alloc + 3'd2) % SW_NUM_DESTS;
+ else
+ compute_switch_alloc = last_alloc;
+ end
+ end
+ end
+ endfunction
+
+ //-------------------------------------------------
+ // Ingress buffers
+ //-------------------------------------------------
+ wire [WIDTH-1:0] ydim_in_data , xdim_in_data , term_in_data ;
+ wire [2:0] ydim_in_dest , xdim_in_dest , term_in_dest ;
+ wire ydim_in_last , xdim_in_last , term_in_last ;
+ wire ydim_in_valid, xdim_in_valid, term_in_valid;
+ wire ydim_in_ready, xdim_in_ready, term_in_ready;
+
+ // Data coming in from the terminal is gated until a full packet arrives
+ // in order to minimize the switch allocation time per packet.
+ axi_packet_gate #(
+ .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE)
+ ) term_in_pkt_gate_i (
+ .clk (clk),
+ .reset (reset),
+ .clear (1'b0),
+ .i_tdata (s_axis_term_tdata),
+ .i_tlast (s_axis_term_tlast),
+ .i_tvalid (s_axis_term_tvalid),
+ .i_tready (s_axis_term_tready),
+ .i_terror (1'b0),
+ .o_tdata (term_in_data),
+ .o_tlast (term_in_last),
+ .o_tvalid (term_in_valid),
+ .o_tready (term_in_ready)
+ );
+ assign term_in_dest = compute_switch_tdest(term_in_data);
+
+ // The XY directions have buffers with 2 virtual channels to minimize the
+ // possibility of a deadlock.
+ axis_ingress_vc_buff #(
+ .WIDTH(WIDTH), .NUM_VCS(2),
+ .SIZE(XB_BUFF_SIZE),
+ .ROUTING(ROUTING_ALLOC)
+ ) xdim_in_vc_buf_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (s_axis_xdim_tdata),
+ .s_axis_tdest (s_axis_xdim_tdest),
+ .s_axis_tlast (s_axis_xdim_tlast),
+ .s_axis_tvalid (s_axis_xdim_tvalid),
+ .s_axis_tready (s_axis_xdim_tready),
+ .m_axis_tdata (xdim_in_data),
+ .m_axis_tlast (xdim_in_last),
+ .m_axis_tvalid (xdim_in_valid),
+ .m_axis_tready (xdim_in_ready)
+ );
+ assign xdim_in_dest = compute_switch_tdest(xdim_in_data);
+
+ axis_ingress_vc_buff #(
+ .WIDTH(WIDTH), .NUM_VCS(2),
+ .SIZE(XB_BUFF_SIZE),
+ .ROUTING(ROUTING_ALLOC)
+ ) ydim_in_vc_buf_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata (s_axis_ydim_tdata ),
+ .s_axis_tdest (s_axis_ydim_tdest ),
+ .s_axis_tlast (s_axis_ydim_tlast ),
+ .s_axis_tvalid (s_axis_ydim_tvalid),
+ .s_axis_tready (s_axis_ydim_tready),
+ .m_axis_tdata (ydim_in_data ),
+ .m_axis_tlast (ydim_in_last ),
+ .m_axis_tvalid (ydim_in_valid),
+ .m_axis_tready (ydim_in_ready)
+ );
+ assign ydim_in_dest = compute_switch_tdest(ydim_in_data);
+
+ //-------------------------------------------------
+ // Switch
+ //-------------------------------------------------
+
+ // Track the input packet state
+ localparam [0:0] PKT_ST_HEAD = 1'b0;
+ localparam [0:0] PKT_ST_BODY = 1'b1;
+ reg [0:0] pkt_state = PKT_ST_HEAD;
+
+ // The switch only accept packets on a single port at a time.
+ wire sw_in_ready = |({ydim_in_ready, xdim_in_ready, term_in_ready});
+ wire sw_in_valid = |({ydim_in_valid, xdim_in_valid, term_in_valid});
+ wire sw_in_last = |({ydim_in_last&ydim_in_valid, xdim_in_last&xdim_in_valid, term_in_last&term_in_valid});
+
+ always @(posedge clk) begin
+ if (reset) begin
+ pkt_state <= PKT_ST_HEAD;
+ end else if (sw_in_valid & sw_in_ready) begin
+ pkt_state <= sw_in_last ? PKT_ST_HEAD : PKT_ST_BODY;
+ end
+ end
+
+ // The switch requires the allocation to stay valid until the
+ // end of the packet. We also might need to keep the previous
+ // packet's allocation to compute the current one
+ wire [1:0] switch_alloc;
+ reg [1:0] prev_switch_alloc = SW_DEST_TERM;
+ reg [1:0] pkt_switch_alloc = SW_DEST_TERM;
+
+ always @(posedge clk) begin
+ if (reset) begin
+ prev_switch_alloc <= SW_DEST_TERM;
+ pkt_switch_alloc <= SW_DEST_TERM;
+ end else if (sw_in_valid & sw_in_ready) begin
+ if (pkt_state == PKT_ST_HEAD)
+ pkt_switch_alloc <= switch_alloc;
+ if (sw_in_last)
+ prev_switch_alloc <= switch_alloc;
+ end
+ end
+
+ assign switch_alloc = (sw_in_valid && pkt_state == PKT_ST_HEAD) ?
+ compute_switch_alloc({ydim_in_valid, xdim_in_valid, term_in_valid}, prev_switch_alloc) :
+ pkt_switch_alloc;
+
+ wire term_tdest_discard;
+ axis_switch #(
+ .DATA_W(WIDTH), .DEST_W(1), .IN_PORTS(3), .OUT_PORTS(3)
+ ) switch_i (
+ .clk (clk),
+ .reset (reset),
+ .s_axis_tdata ({ydim_in_data , xdim_in_data , term_in_data }),
+ .s_axis_tdest ({ydim_in_dest , xdim_in_dest , term_in_dest }),
+ .s_axis_tlast ({ydim_in_last , xdim_in_last , term_in_last }),
+ .s_axis_tvalid ({ydim_in_valid, xdim_in_valid, term_in_valid}),
+ .s_axis_tready ({ydim_in_ready, xdim_in_ready, term_in_ready}),
+ .s_axis_alloc (switch_alloc),
+ .m_axis_tdata ({m_axis_ydim_tdata, m_axis_xdim_tdata, m_axis_term_tdata }),
+ .m_axis_tdest ({m_axis_ydim_tdest, m_axis_xdim_tdest, term_tdest_discard}),
+ .m_axis_tlast ({m_axis_ydim_tlast, m_axis_xdim_tlast, m_axis_term_tlast }),
+ .m_axis_tvalid ({m_axis_ydim_tvalid, m_axis_xdim_tvalid, m_axis_term_tvalid}),
+ .m_axis_tready ({m_axis_ydim_tready, m_axis_xdim_tready, m_axis_term_tready})
+ );
+
+endmodule
+