diff options
Diffstat (limited to 'fpga/usrp3/lib/rfnoc/crossbar')
33 files changed, 5061 insertions, 0 deletions
diff --git a/fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs b/fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs new file mode 100644 index 000000000..6fa49cd04 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/Makefile.srcs @@ -0,0 +1,25 @@ +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +################################################## +# Crossbar Sources +################################################## +RFNOC_XBAR_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/rfnoc/crossbar/, \ +axis_ctrl_crossbar_2d_mesh.v \ +axis_ctrl_crossbar_nxn.v \ +torus_2d_dor_router_single_sw.v \ +mesh_2d_dor_router_single_sw.v \ +axis_ingress_vc_buff.v \ +axis_switch.v \ +axis_port_terminator.v \ +chdr_crossbar_nxn.v \ +chdr_xb_ingress_buff.v \ +chdr_xb_routing_table.v \ +)) + +# Unused sources +# torus_2d_dor_router_multi_sw.v \ +# mesh_2d_dor_router_multi_sw.v \ diff --git a/fpga/usrp3/lib/rfnoc/crossbar/README.pdf b/fpga/usrp3/lib/rfnoc/crossbar/README.pdf Binary files differnew file mode 100644 index 000000000..838702bd1 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/README.pdf diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v new file mode 100644 index 000000000..e69bdfe3c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_2d_mesh.v @@ -0,0 +1,288 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ctrl_crossbar_2d_mesh +// Description: +// This module implements a 2-dimentional (2d) mesh network (mesh) crossbar +// for AXIS-CTRL traffic. Supports mesh and torus topologies. +// It uses AXI-Stream for all of its links. +// The torus topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - WIDTH: Width of the AXI-Stream data bus +// - DIM_SIZE: Number of routers alone one dimension (# Nodes = DIM_SIZE * DIM_SIZE) +// - TOPOLOGY: Is this a mesh (MESH) or a torus (TORUS) topology +// - INGRESS_BUFF_SIZE: log2 of the ingress terminal buffer size (in words) +// - ROUTER_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words) +// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers. +// * WORMHOLE: Allocate route as soon as first word in pkt arrives +// * CUT-THROUGH: Allocate route only after the full pkt arrives +// - SWITCH_ALLOC: Algorithm to allocate the switch +// * PRIO: Priority based. Priority: Y-dim > X-dim > Term +// * ROUND-ROBIN: Round robin input port allocation +// - DEADLOCK_TIMEOUT: Number of cycles to wait until a deadlock is detected +// Signals: +// - s_axis_*: Slave port for router (flattened) +// - m_axis_*: Master port for router (flattened) +// + +module axis_ctrl_crossbar_2d_mesh #( + parameter DIM_SIZE = 4, + parameter WIDTH = 64, + parameter TOPOLOGY = "MESH", + parameter INGRESS_BUFF_SIZE = 5, + parameter ROUTER_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", + parameter SWITCH_ALLOC = "PRIO", + parameter DEADLOCK_TIMEOUT = 16384 +) ( + input wire clk, + input wire reset, + // Inputs + input wire [(DIM_SIZE*DIM_SIZE*WIDTH)-1:0] s_axis_tdata, + input wire [DIM_SIZE*DIM_SIZE-1:0] s_axis_tlast, + input wire [DIM_SIZE*DIM_SIZE-1:0] s_axis_tvalid, + output wire [DIM_SIZE*DIM_SIZE-1:0] s_axis_tready, + // Output + output wire [(DIM_SIZE*DIM_SIZE*WIDTH)-1:0] m_axis_tdata, + output wire [DIM_SIZE*DIM_SIZE-1:0] m_axis_tlast, + output wire [DIM_SIZE*DIM_SIZE-1:0] m_axis_tvalid, + input wire [DIM_SIZE*DIM_SIZE-1:0] m_axis_tready, + // Deadlock alert + output wire deadlock_detected +); + + `include "mesh_node_mapping.vh" + + //------------------------------------------------------- + // Unflatten input and output ports + //------------------------------------------------------- + + wire [WIDTH-1:0] i_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire i_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire i_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire i_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire [WIDTH-1:0] o_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire o_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire o_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire o_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire clear_routers = deadlock_detected; + + genvar p,x,y; + generate + for (p = 0; p < DIM_SIZE*DIM_SIZE; p=p+1) begin + assign i_tdata_arr[node_to_ydst(p)][node_to_xdst(p)] = s_axis_tdata[p*WIDTH +: WIDTH]; + assign i_tlast_arr[node_to_ydst(p)][node_to_xdst(p)] = s_axis_tlast[p]; + assign i_tvalid_arr[node_to_ydst(p)][node_to_xdst(p)] = s_axis_tvalid[p]; + assign s_axis_tready[p] = i_tready_arr[node_to_ydst(p)][node_to_xdst(p)] | clear_routers; + + assign m_axis_tdata[p*WIDTH +: WIDTH] = o_tdata_arr[node_to_ydst(p)][node_to_xdst(p)]; + assign m_axis_tlast[p] = o_tlast_arr [node_to_ydst(p)][node_to_xdst(p)]; + assign m_axis_tvalid[p] = o_tvalid_arr[node_to_ydst(p)][node_to_xdst(p)] & ~clear_routers; + assign o_tready_arr[node_to_ydst(p)][node_to_xdst(p)] = m_axis_tready[p]; + end + endgenerate + + //------------------------------------------------------- + // Instantiate routers + //------------------------------------------------------- + + wire [WIDTH-1:0] e2w_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire e2w_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire e2w_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire e2w_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire e2w_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire [WIDTH-1:0] w2e_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire w2e_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire w2e_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire w2e_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire w2e_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire [WIDTH-1:0] n2s_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire n2s_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire n2s_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire n2s_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire n2s_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + wire [WIDTH-1:0] s2n_tdata_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire s2n_tdest_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire s2n_tlast_arr [0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire s2n_tvalid_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + wire s2n_tready_arr[0:DIM_SIZE-1][0:DIM_SIZE-1]; + + localparam N = DIM_SIZE; + localparam NEND = DIM_SIZE - 1; + localparam [WIDTH-1:0] ZERO = {WIDTH{1'b0}}; + + generate + for (y = 0; y < DIM_SIZE; y=y+1) begin: ydim + for (x = 0; x < DIM_SIZE; x=x+1) begin: xdim + if (TOPOLOGY == "MESH") begin + mesh_2d_dor_router_single_sw #( + .WIDTH (WIDTH), + .DIM_SIZE (DIM_SIZE), + .XB_ADDR_X (x), + .XB_ADDR_Y (y), + .TERM_BUFF_SIZE (INGRESS_BUFF_SIZE), + .XB_BUFF_SIZE (ROUTER_BUFF_SIZE), + .ROUTING_ALLOC (ROUTING_ALLOC), + .SWITCH_ALLOC (SWITCH_ALLOC) + ) rtr_i ( + // Clock and reset + .clk (clk), + .reset (reset | clear_routers), + // Terminals + .s_axis_ter_tdata (i_tdata_arr [y][x]), + .s_axis_ter_tlast (i_tlast_arr [y][x]), + .s_axis_ter_tvalid (i_tvalid_arr[y][x]), + .s_axis_ter_tready (i_tready_arr[y][x]), + .m_axis_ter_tdata (o_tdata_arr [y][x]), + .m_axis_ter_tlast (o_tlast_arr [y][x]), + .m_axis_ter_tvalid (o_tvalid_arr[y][x]), + .m_axis_ter_tready (o_tready_arr[y][x]), + // West connections + .s_axis_wst_tdata ((x != 0) ? e2w_tdata_arr [y][x] : ZERO), + .s_axis_wst_tdest ((x != 0) ? e2w_tdest_arr [y][x] : 1'b0), + .s_axis_wst_tlast ((x != 0) ? e2w_tlast_arr [y][x] : 1'b0), + .s_axis_wst_tvalid ((x != 0) ? e2w_tvalid_arr[y][x] : 1'b0), + .s_axis_wst_tready ( e2w_tready_arr[y][x] ), + .m_axis_wst_tdata ( w2e_tdata_arr [y][(x+N-1)%N] ), + .m_axis_wst_tdest ( w2e_tdest_arr [y][(x+N-1)%N] ), + .m_axis_wst_tlast ( w2e_tlast_arr [y][(x+N-1)%N] ), + .m_axis_wst_tvalid ( w2e_tvalid_arr[y][(x+N-1)%N] ), + .m_axis_wst_tready ((x != 0) ? w2e_tready_arr[y][(x+N-1)%N] : 1'b1), + // East connections + .s_axis_est_tdata ((x != NEND) ? w2e_tdata_arr [y][x] : ZERO), + .s_axis_est_tdest ((x != NEND) ? w2e_tdest_arr [y][x] : 1'b0), + .s_axis_est_tlast ((x != NEND) ? w2e_tlast_arr [y][x] : 1'b0), + .s_axis_est_tvalid ((x != NEND) ? w2e_tvalid_arr[y][x] : 1'b0), + .s_axis_est_tready ( w2e_tready_arr[y][x] ), + .m_axis_est_tdata ( e2w_tdata_arr [y][(x+1)%N] ), + .m_axis_est_tdest ( e2w_tdest_arr [y][(x+1)%N] ), + .m_axis_est_tlast ( e2w_tlast_arr [y][(x+1)%N] ), + .m_axis_est_tvalid ( e2w_tvalid_arr[y][(x+1)%N] ), + .m_axis_est_tready ((x != NEND) ? e2w_tready_arr[y][(x+1)%N] : 1'b1), + // North connections + .s_axis_nor_tdata ((y != 0) ? s2n_tdata_arr [y][x] : ZERO), + .s_axis_nor_tdest ((y != 0) ? s2n_tdest_arr [y][x] : 1'b0), + .s_axis_nor_tlast ((y != 0) ? s2n_tlast_arr [y][x] : 1'b0), + .s_axis_nor_tvalid ((y != 0) ? s2n_tvalid_arr[y][x] : 1'b0), + .s_axis_nor_tready ( s2n_tready_arr[y][x] ), + .m_axis_nor_tdata ( n2s_tdata_arr [(y+N-1)%N][x] ), + .m_axis_nor_tdest ( n2s_tdest_arr [(y+N-1)%N][x] ), + .m_axis_nor_tlast ( n2s_tlast_arr [(y+N-1)%N][x] ), + .m_axis_nor_tvalid ( n2s_tvalid_arr[(y+N-1)%N][x] ), + .m_axis_nor_tready ((y != 0) ? n2s_tready_arr[(y+N-1)%N][x] : 1'b1), + // South connections + .s_axis_sou_tdata ((y != NEND) ? n2s_tdata_arr [y][x] : ZERO), + .s_axis_sou_tdest ((y != NEND) ? n2s_tdest_arr [y][x] : 1'b0), + .s_axis_sou_tlast ((y != NEND) ? n2s_tlast_arr [y][x] : 1'b0), + .s_axis_sou_tvalid ((y != NEND) ? n2s_tvalid_arr[y][x] : 1'b0), + .s_axis_sou_tready ( n2s_tready_arr[y][x] ), + .m_axis_sou_tdata ( s2n_tdata_arr [(y+1)%N][x] ), + .m_axis_sou_tdest ( s2n_tdest_arr [(y+1)%N][x] ), + .m_axis_sou_tlast ( s2n_tlast_arr [(y+1)%N][x] ), + .m_axis_sou_tvalid ( s2n_tvalid_arr[(y+1)%N][x] ), + .m_axis_sou_tready ((y != NEND) ? s2n_tready_arr[(y+1)%N][x] : 1'b1) + ); + end else begin + torus_2d_dor_router_single_sw #( + .WIDTH (WIDTH), + .DIM_SIZE (DIM_SIZE), + .XB_ADDR_X (x), + .XB_ADDR_Y (y), + .TERM_BUFF_SIZE (INGRESS_BUFF_SIZE), + .XB_BUFF_SIZE (ROUTER_BUFF_SIZE), + .ROUTING_ALLOC (ROUTING_ALLOC), + .SWITCH_ALLOC (SWITCH_ALLOC) + ) rtr_i ( + // Clock and reset + .clk (clk), + .reset (reset | clear_routers), + // Terminals + .s_axis_term_tdata (i_tdata_arr [y][x]), + .s_axis_term_tlast (i_tlast_arr [y][x]), + .s_axis_term_tvalid (i_tvalid_arr[y][x]), + .s_axis_term_tready (i_tready_arr[y][x]), + .m_axis_term_tdata (o_tdata_arr [y][x]), + .m_axis_term_tlast (o_tlast_arr [y][x]), + .m_axis_term_tvalid (o_tvalid_arr[y][x]), + .m_axis_term_tready (o_tready_arr[y][x]), + // X-dim connections + .s_axis_xdim_tdata (e2w_tdata_arr [y][x] ), + .s_axis_xdim_tdest (e2w_tdest_arr [y][x] ), + .s_axis_xdim_tlast (e2w_tlast_arr [y][x] ), + .s_axis_xdim_tvalid (e2w_tvalid_arr[y][x] ), + .s_axis_xdim_tready (e2w_tready_arr[y][x] ), + .m_axis_xdim_tdata (e2w_tdata_arr [y][(x+1)%N]), + .m_axis_xdim_tdest (e2w_tdest_arr [y][(x+1)%N]), + .m_axis_xdim_tlast (e2w_tlast_arr [y][(x+1)%N]), + .m_axis_xdim_tvalid (e2w_tvalid_arr[y][(x+1)%N]), + .m_axis_xdim_tready (e2w_tready_arr[y][(x+1)%N]), + // Y-dim connections + .s_axis_ydim_tdata (s2n_tdata_arr [y][x] ), + .s_axis_ydim_tdest (s2n_tdest_arr [y][x] ), + .s_axis_ydim_tlast (s2n_tlast_arr [y][x] ), + .s_axis_ydim_tvalid (s2n_tvalid_arr[y][x] ), + .s_axis_ydim_tready (s2n_tready_arr[y][x] ), + .m_axis_ydim_tdata (s2n_tdata_arr [(y+1)%N][x]), + .m_axis_ydim_tdest (s2n_tdest_arr [(y+1)%N][x]), + .m_axis_ydim_tlast (s2n_tlast_arr [(y+1)%N][x]), + .m_axis_ydim_tvalid (s2n_tvalid_arr[(y+1)%N][x]), + .m_axis_ydim_tready (s2n_tready_arr[(y+1)%N][x]) + ); + end + end + end + endgenerate + + //------------------------------------------------------- + // Deadlock detector + //------------------------------------------------------- + // A deadlock is defined on an AXIS bus as an extended period + // where tvlid=1 but tready=0. If at least one slave port is in + // this state and none of the master ports are then this router + // will go into a failsafe deadlock recovery mode. The DEADLOCK_TIMEOUT + // parameter defines the duration for which this condition has + // to be true. In deadlock recovery mode, all routers are held in reset + // (thus losing all packets in flights) and all input ports are flushed. + + wire m_locked = |(m_axis_tvalid & ~m_axis_tready); + wire s_locked = |(s_axis_tvalid & ~s_axis_tready); + + // A counter that tracks the duration for which the router is livelocked + // If the livelock duration is higher than DEADLOCK_TIMEOUT then it is a + // deadlock + reg [$clog2(DEADLOCK_TIMEOUT)-1:0] deadlock_counter = DEADLOCK_TIMEOUT-1; + always @(posedge clk) begin + if (reset | ~(s_locked & ~m_locked)) begin + deadlock_counter <= DEADLOCK_TIMEOUT-1; + end else if (deadlock_counter != 'd0) begin + deadlock_counter <= deadlock_counter - 1; + end + end + + // A counter that tracks the deadlock recovery period. If the slave ports + // have no activity for DEADLOCK_TIMEOUT cycles then the router can + // successfully come out of the deadlocked state. + reg [$clog2(DEADLOCK_TIMEOUT)-1:0] deadlock_recover_counter = 'd0; + always @(posedge clk) begin + if (reset) begin + deadlock_recover_counter <= 'd0; + end else if (deadlock_detected) begin + if (|s_axis_tvalid) + deadlock_recover_counter <= DEADLOCK_TIMEOUT-1; + else + deadlock_recover_counter <= deadlock_recover_counter - 1; + end else if (deadlock_counter == 'd0) begin + deadlock_recover_counter <= DEADLOCK_TIMEOUT-1; + end + end + assign deadlock_detected = (deadlock_recover_counter != 0); + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v new file mode 100644 index 000000000..6de082b4c --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_ctrl_crossbar_nxn.v @@ -0,0 +1,130 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ctrl_crossbar_nxn +// Description: +// This module implements a 2-dimentional (2d) mesh network (mesh) crossbar +// for AXIS-CTRL traffic. Supports mesh and torus topologies. +// It uses AXI-Stream for all of its links. +// The torus topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - WIDTH: Width of the AXI-Stream data bus +// - NPORTS: Number of ports (maximum 1024) +// - TOPOLOGY: Is this a mesh (MESH) or a torus (TORUS) topology +// - INGRESS_BUFF_SIZE: log2 of the ingress terminal buffer size (in words) +// - ROUTER_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words) +// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers. +// * WORMHOLE: Allocate route as soon as first word in pkt arrives +// * CUT-THROUGH: Allocate route only after the full pkt arrives +// - SWITCH_ALLOC: Algorithm to allocate the switch +// * PRIO: Priority based. Priority: Y-dim > X-dim > Term +// * ROUND-ROBIN: Round robin input port allocation +// - DEADLOCK_TIMEOUT: Number of cycles to wait until a deadlock is detected +// Signals: +// - s_axis_*: Slave port for router (flattened) +// - m_axis_*: Master port for router (flattened) +// + +module axis_ctrl_crossbar_nxn #( + parameter WIDTH = 32, + parameter NPORTS = 10, + parameter TOPOLOGY = "TORUS", + parameter INGRESS_BUFF_SIZE = 5, + parameter ROUTER_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", + parameter SWITCH_ALLOC = "PRIO", + parameter DEADLOCK_TIMEOUT = 16384 +) ( + input wire clk, + input wire reset, + // Inputs + input wire [(NPORTS*WIDTH)-1:0] s_axis_tdata, + input wire [NPORTS-1:0] s_axis_tlast, + input wire [NPORTS-1:0] s_axis_tvalid, + output wire [NPORTS-1:0] s_axis_tready, + // Output + output wire [(NPORTS*WIDTH)-1:0] m_axis_tdata, + output wire [NPORTS-1:0] m_axis_tlast, + output wire [NPORTS-1:0] m_axis_tvalid, + input wire [NPORTS-1:0] m_axis_tready, + // Deadlock alert + output wire deadlock_detected +); + + function integer csqrt_max1024; + input integer value; + integer i; + begin + csqrt_max1024 = 1; + for (i = 1; i <= 32; i = i + 1) // sqrt(1024) = 32 + csqrt_max1024 = csqrt_max1024 + (i*i < value ? 1 : 0); + end + endfunction + + localparam integer DIM_SIZE = csqrt_max1024(NPORTS); + + wire [(DIM_SIZE*DIM_SIZE*WIDTH)-1:0] i_tdata, o_tdata ; + wire [DIM_SIZE*DIM_SIZE-1:0] i_tlast, o_tlast ; + wire [DIM_SIZE*DIM_SIZE-1:0] i_tvalid, o_tvalid; + wire [DIM_SIZE*DIM_SIZE-1:0] i_tready, o_tready; + + // axis_ctrl_crossbar_2d_mesh needs to scale up in squares + // i.e. 4, 9, 16, 25, ... but NPORTS can be any number, so + // instantiate the next highest square number of ports and + // terminate the rest. + axis_ctrl_crossbar_2d_mesh #( + .WIDTH (WIDTH), + .DIM_SIZE (DIM_SIZE), + .TOPOLOGY (TOPOLOGY), + .INGRESS_BUFF_SIZE(INGRESS_BUFF_SIZE), + .ROUTER_BUFF_SIZE (ROUTER_BUFF_SIZE), + .ROUTING_ALLOC (ROUTING_ALLOC), + .SWITCH_ALLOC (SWITCH_ALLOC), + .DEADLOCK_TIMEOUT (DEADLOCK_TIMEOUT) + ) router_dut_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (i_tdata), + .s_axis_tlast (i_tlast), + .s_axis_tvalid (i_tvalid), + .s_axis_tready (i_tready), + .m_axis_tdata (o_tdata), + .m_axis_tlast (o_tlast), + .m_axis_tvalid (o_tvalid), + .m_axis_tready (o_tready), + .deadlock_detected(deadlock_detected) + ); + + // Connect the bottom NPORTS to the IO + assign i_tdata[(NPORTS*WIDTH)-1:0] = s_axis_tdata; + assign i_tlast[NPORTS-1:0] = s_axis_tlast; + assign i_tvalid[NPORTS-1:0] = s_axis_tvalid; + assign s_axis_tready = i_tready[NPORTS-1:0]; + + assign m_axis_tdata = o_tdata[(NPORTS*WIDTH)-1:0]; + assign m_axis_tlast = o_tlast[NPORTS-1:0]; + assign m_axis_tvalid = o_tvalid[NPORTS-1:0]; + assign o_tready[NPORTS-1:0] = m_axis_tready; + + // Terminate the rest + genvar i; + generate for (i = NPORTS; i < (DIM_SIZE*DIM_SIZE); i = i + 1) begin: ports + axis_port_terminator #(.DATA_W(WIDTH)) term_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (o_tdata[(i*WIDTH)+:WIDTH]), + .s_axis_tlast (o_tlast[i]), + .s_axis_tvalid(o_tvalid[i]), + .s_axis_tready(o_tready[i]), + .m_axis_tdata (i_tdata[(i*WIDTH)+:WIDTH]), + .m_axis_tlast (i_tlast[i]), + .m_axis_tvalid(i_tvalid[i]), + .m_axis_tready(i_tready[i]), + .pkts_dropped () + ); + end endgenerate + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v new file mode 100644 index 000000000..fd10d6682 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_ingress_vc_buff.v @@ -0,0 +1,178 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_ingress_vc_buff +// Description: +// A wrapper around a buffer to implement one or more virtual channels +// Supports gate a packet for cut-through routing + +module axis_ingress_vc_buff #( + parameter WIDTH = 64, // Width of the datapath + parameter NUM_VCS = 2, // Number of virtual channels + parameter SIZE = 5, // Virtual channel buffer size + parameter ROUTING = "WORMHOLE", // Routing (switching) method {WORMHOLE, CUT-THROUGH} + parameter DEST_W = (NUM_VCS > 1) ? $clog2(NUM_VCS) : 1 // PRIVATE +) ( + input wire clk, + input wire reset, + input wire [WIDTH-1:0] s_axis_tdata, + input wire [DEST_W-1:0] s_axis_tdest, + input wire s_axis_tlast, + input wire s_axis_tvalid, + output wire s_axis_tready, + output wire [WIDTH-1:0] m_axis_tdata, + output wire m_axis_tlast, + output wire m_axis_tvalid, + input wire m_axis_tready +); + + generate if (NUM_VCS > 1) begin + //---------------------------------------------------- + // Multiple virtual channels + //---------------------------------------------------- + + wire [(WIDTH*NUM_VCS)-1:0] bufin_tdata , bufout_tdata ; + wire [NUM_VCS-1:0] bufin_tlast , bufout_tlast ; + wire [NUM_VCS-1:0] bufin_tvalid, bufout_tvalid; + wire [NUM_VCS-1:0] bufin_tready, bufout_tready; + + axi_demux #( + .WIDTH(WIDTH), .SIZE(NUM_VCS), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) vc_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (/* unused */), + .dest (s_axis_tdest ), + .i_tdata (s_axis_tdata ), + .i_tlast (s_axis_tlast ), + .i_tvalid (s_axis_tvalid), + .i_tready (s_axis_tready), + .o_tdata (bufin_tdata), + .o_tlast (bufin_tlast), + .o_tvalid (bufin_tvalid), + .o_tready (bufin_tready) + ); + + genvar vc; + for (vc = 0; vc < NUM_VCS; vc = vc + 1) begin + if (ROUTING == "WORMHOLE") begin + axi_fifo #( + .WIDTH(WIDTH+1), .SIZE(SIZE) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({bufin_tlast[vc], bufin_tdata [(vc*WIDTH)+:WIDTH]}), + .i_tvalid (bufin_tvalid [vc]), + .i_tready (bufin_tready [vc]), + .o_tdata ({bufout_tlast[vc], bufout_tdata [(vc*WIDTH)+:WIDTH]}), + .o_tvalid (bufout_tvalid[vc]), + .o_tready (bufout_tready[vc]), + .space (), + .occupied () + ); + end else begin + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(SIZE) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (bufin_tdata[(vc*WIDTH)+:WIDTH]), + .i_tlast (bufin_tlast[vc]), + .i_tvalid (bufin_tvalid[vc]), + .i_tready (bufin_tready[vc]), + .i_terror (1'b0), + .o_tdata (bufout_tdata[(vc*WIDTH)+:WIDTH]), + .o_tlast (bufout_tlast[vc]), + .o_tvalid (bufout_tvalid[vc]), + .o_tready (bufout_tready[vc]) + ); + end + end + + axi_mux #( + .WIDTH(WIDTH), .SIZE(NUM_VCS), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) vc_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (bufout_tdata ), + .i_tlast (bufout_tlast ), + .i_tvalid (bufout_tvalid), + .i_tready (bufout_tready), + .o_tdata (m_axis_tdata ), + .o_tlast (m_axis_tlast ), + .o_tvalid (m_axis_tvalid), + .o_tready (m_axis_tready) + ); + + end else begin + //---------------------------------------------------- + // Single virtual channel + //---------------------------------------------------- + wire [WIDTH-1:0] pipe_tdata; + wire pipe_tlast; + wire pipe_tvalid; + wire pipe_tready; + + if (ROUTING == "WORMHOLE") begin + axi_fifo #( + .WIDTH(WIDTH+1), .SIZE(SIZE) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({s_axis_tlast, s_axis_tdata}), + .i_tvalid (s_axis_tvalid ), + .i_tready (s_axis_tready ), + .o_tdata ({pipe_tlast, pipe_tdata}), + .o_tvalid (pipe_tvalid), + .o_tready (pipe_tready), + .space (), + .occupied () + ); + end else begin + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(SIZE) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_tdata), + .i_tlast (s_axis_tlast), + .i_tvalid (s_axis_tvalid), + .i_tready (s_axis_tready), + .i_terror (1'b0), + .o_tdata (pipe_tdata), + .o_tlast (pipe_tlast), + .o_tvalid (pipe_tvalid), + .o_tready (pipe_tready) + ); + end + + axi_fifo #( + .WIDTH(WIDTH+1), .SIZE(1) + ) buf_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({pipe_tlast, pipe_tdata}), + .i_tvalid (pipe_tvalid ), + .i_tready (pipe_tready ), + .o_tdata ({m_axis_tlast, m_axis_tdata}), + .o_tvalid (m_axis_tvalid), + .o_tready (m_axis_tready), + .space (), + .occupied () + ); + + end endgenerate + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v new file mode 100644 index 000000000..bf9fa24be --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_port_terminator.v @@ -0,0 +1,44 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_port_terminator +// Description: +// A dummy terminator for unused crossbar ports + +module axis_port_terminator #( + parameter DATA_W = 64 +) ( + // Clocks and resets + input wire clk, + input wire reset, + // Input ports + input wire [DATA_W-1:0] s_axis_tdata, // Input data + input wire s_axis_tlast, // Input EOP (last) + input wire s_axis_tvalid, // Input valid + output wire s_axis_tready, // Input ready + // Output ports + output wire [DATA_W-1:0] m_axis_tdata, // Output data + output wire m_axis_tlast, // Output EOP (last) + output wire m_axis_tvalid, // Output valid + input wire m_axis_tready, // Output ready + // Metrics + output reg [15:0] pkts_dropped +); + + assign s_axis_tready = 1'b1; + assign m_axis_tdata = {DATA_W{1'b0}}; + assign m_axis_tlast = 1'b0; + assign m_axis_tvalid = 1'b0; + + always @(posedge clk) begin + if (reset) begin + pkts_dropped <= 'd0; + end else if (s_axis_tvalid & s_axis_tlast & s_axis_tready) begin + pkts_dropped <= pkts_dropped + 'd1; + end + end + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v b/fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v new file mode 100644 index 000000000..24b9e4129 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/axis_switch.v @@ -0,0 +1,164 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: axis_switch +// Description: +// Implementation of a M-input, N-output AXI-Stream switch. +// One of the M input ports is allocated based on the s_axis_alloc signal +// and the packet on that port is sent to one of the N output ports based +// on the tdest signal + +module axis_switch #( + parameter DATA_W = 64, // tdata width + parameter DEST_W = 1, // Output tdest width + parameter IN_PORTS = 3, // Number of input ports + parameter OUT_PORTS = 3, // Number of output ports + parameter PIPELINE = 1, // Instantiate output pipeline stage? + parameter ALLOC_W = (IN_PORTS == 1) ? 1 : $clog2(IN_PORTS) //PRIVATE +) ( + // Clocks and resets + input wire clk, // Switch clock + input wire reset, // Reset + // Input ports + input wire [(DATA_W*IN_PORTS)-1:0] s_axis_tdata, // Input data + input wire [((DEST_W+$clog2(OUT_PORTS))*IN_PORTS)-1:0] s_axis_tdest, // Input destination + input wire [IN_PORTS-1:0] s_axis_tlast, // Input EOP (last) + input wire [IN_PORTS-1:0] s_axis_tvalid, // Input valid + output wire [IN_PORTS-1:0] s_axis_tready, // Input ready + input wire [ALLOC_W-1:0] s_axis_alloc, // Input port allocation for switch + // Output ports + output wire [(DATA_W*OUT_PORTS)-1:0] m_axis_tdata, // Output data + output wire [(DEST_W*OUT_PORTS)-1:0] m_axis_tdest, // Output destination + output wire [OUT_PORTS-1:0] m_axis_tlast, // Output EOP (last) + output wire [OUT_PORTS-1:0] m_axis_tvalid, // Output valid + input wire [OUT_PORTS-1:0] m_axis_tready // Output ready +); + // PRIVATE: Vivado synthesizer workaround (cannot be localparam) + localparam CLOG2_IN_PORTS = $clog2(IN_PORTS); + localparam CLOG2_OUT_PORTS = $clog2(OUT_PORTS); + + //--------------------------------------------------------- + // Flatten/unflatten and pipeline + //--------------------------------------------------------- + wire [DATA_W-1:0] i_tdata [0:IN_PORTS-1]; + wire [DEST_W+$clog2(OUT_PORTS)-1:0] i_tdest [0:IN_PORTS-1]; + wire i_tlast [0:IN_PORTS-1]; + wire [IN_PORTS-1:0] i_tvalid; + wire [IN_PORTS-1:0] i_tready; + wire [ALLOC_W-1:0] i_alloc; + wire [DATA_W-1:0] o_tdata [0:OUT_PORTS-1]; + wire [DEST_W-1:0] o_tdest [0:OUT_PORTS-1]; + wire o_tlast [0:OUT_PORTS-1]; + wire [OUT_PORTS-1:0] o_tvalid; + wire [OUT_PORTS-1:0] o_tready; + + genvar i, o; + generate + for (i = 0; i < IN_PORTS; i = i + 1) begin: in_ports + assign i_tdata [i] = s_axis_tdata [(i*DATA_W)+:DATA_W]; + assign i_tdest [i] = s_axis_tdest [(i*(DEST_W+CLOG2_OUT_PORTS))+:(DEST_W+CLOG2_OUT_PORTS)]; + assign i_tlast [i] = s_axis_tlast [i]; + assign i_tvalid [i] = s_axis_tvalid[i]; + assign s_axis_tready[i] = i_tready [i]; + end + assign i_alloc = s_axis_alloc; //i_alloc has to be delay matched to valid + + for (o = 0; o < OUT_PORTS; o = o + 1) begin + if (PIPELINE == 1) begin + axi_fifo_flop2 #(.WIDTH(DEST_W+1+DATA_W)) out_pipe_i ( + .clk(clk), .reset(reset), .clear(1'b0), + .i_tdata({o_tdest[o], o_tlast[o], o_tdata[o]}), + .i_tvalid(o_tvalid[o]), .i_tready(o_tready[o]), + .o_tdata({m_axis_tdest[(o*DEST_W)+:DEST_W], m_axis_tlast[o], m_axis_tdata[(o*DATA_W)+:DATA_W]}), + .o_tvalid(m_axis_tvalid[o]), .o_tready(m_axis_tready[o]), + .space(), .occupied() + ); + end else begin + assign m_axis_tdata [(o*DATA_W)+:DATA_W] = o_tdata [o]; + assign m_axis_tdest [(o*DEST_W)+:DEST_W] = o_tdest [o]; + assign m_axis_tlast [o] = o_tlast [o]; + assign m_axis_tvalid[o] = o_tvalid [o]; + assign o_tready [o] = m_axis_tready[o]; + end + end + endgenerate + + //--------------------------------------------------------- + // Allocator + //--------------------------------------------------------- + // The "chosen" input port will drive this bus + wire [DATA_W-1:0] master_tdata; + wire [DEST_W+$clog2(OUT_PORTS)-1:0] master_tdest; + wire master_tlast; + wire master_tvalid; + wire master_tready; + + generate if (IN_PORTS > 1) begin + reg [IN_PORTS-1:0] ialloc_oh; + reg [$clog2(IN_PORTS)-1:0] alloc_reg; + always @(posedge clk) begin + if (reset) begin + ialloc_oh <= {IN_PORTS{1'b0}}; + end else begin + if (ialloc_oh == {IN_PORTS{1'b0}}) begin + if (|i_tvalid) begin + ialloc_oh[i_alloc] <= 1'b1; + alloc_reg <= i_alloc; + end + end else begin + if(master_tready & master_tvalid & master_tlast) + ialloc_oh <= {IN_PORTS{1'b0}}; + end + end + end + + assign master_tdata = i_tdata[alloc_reg]; + assign master_tdest = i_tdest[alloc_reg]; + assign master_tlast = i_tlast[alloc_reg]; + assign master_tvalid = |(i_tvalid & ialloc_oh); + assign i_tready = i_tvalid & ialloc_oh & {IN_PORTS{master_tready}}; + end else begin + // Special case: One input port + assign master_tdata = i_tdata[0]; + assign master_tdest = i_tdest[0]; + assign master_tlast = i_tlast[0]; + assign master_tvalid = i_tvalid[0]; + assign i_tready[0] = master_tready; + end endgenerate + + //--------------------------------------------------------- + // Router + //--------------------------------------------------------- + generate if (OUT_PORTS > 1) begin + reg [OUT_PORTS-1:0] odst_oh; + always @(posedge clk) begin + if (reset) begin + odst_oh <= {OUT_PORTS{1'b0}}; + end else begin + if (odst_oh == {OUT_PORTS{1'b0}}) begin + if (master_tvalid) + odst_oh[master_tdest[CLOG2_OUT_PORTS-1:0]] <= 1'b1; + end else begin + if(master_tready & master_tvalid & master_tlast) + odst_oh <= {OUT_PORTS{1'b0}}; + end + end + end + assign master_tready = |(o_tready & odst_oh); + assign o_tvalid = {OUT_PORTS{master_tvalid}} & odst_oh; + end else begin + // Special case: One output port + assign master_tready = o_tready[0]; + assign o_tvalid[0] = master_tvalid; + end endgenerate + + generate for (o = 0; o < OUT_PORTS; o = o + 1) begin + assign o_tdata[o] = master_tdata; + assign o_tdest[o] = master_tdest[DEST_W+CLOG2_OUT_PORTS-1:CLOG2_OUT_PORTS]; + assign o_tlast[o] = master_tlast; + end endgenerate + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v b/fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v new file mode 100644 index 000000000..79f1a6626 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/chdr_crossbar_nxn.v @@ -0,0 +1,381 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_crossbar_nxn +// Description: +// This module implements a full-bandwidth NxN crossbar with N input and output ports +// for CHDR traffic. It supports multiple optimization strategies for performance, +// area and timing tradeoffs. It uses AXI-Stream for all of its links. The crossbar +// has a dynamic routing table based on a Content Addressable Memory (CAM). The SID +// is used to determine the destination of a packet and the routing table contains +// a re-programmable SID to crossbar port mapping. The table is programmed using +// special route config packets on the data input ports or using an optional +// management port. +// The topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - CHDR_W: Width of the AXI-Stream data bus +// - NPORTS: Number of ports to instantiate +// - DEFAULT_PORT: The failsafe port to forward a packet to is SID mapping is missing +// - MTU: log2 of max packet size (in words) +// - ROUTE_TBL_SIZE: log2 of the number of mappings that the routing table can hold +// at any time. Mapping values are maintained in a FIFO fashion. +// - MUX_ALLOC: Algorithm to allocate the egress MUX +// * PRIO: Priority based. Lower port numbers have a higher priority +// * ROUND-ROBIN: Round robin input port allocation +// - OPTIMIZE: Optimization strategy for performance vs area vs timing tradeoffs +// * AREA: Attempt to minimize area at the cost of performance (throughput) and/or timing +// * PERFORMANCE: Attempt to maximize performance at the cost of area and/or timing +// * TIMING: Attempt to maximize Fmax at the cost of area and/or performance +// - NPORTS_MGMT: Number of ports with management endpoint. The first NPORTS_MGMT ports will +// have the management port instantiated +// - EXT_RTCFG_PORT: Enable a side-channel AXI-Stream management port to configure the +// routing table +// Signals: +// - s_axis_*: Slave port for router (flattened) +// - m_axis_*: Master port for router (flattened) +// - s_axis_mgmt_*: Management slave port +// - device_id: The ID of the device that has instantiated this module +// + +module chdr_crossbar_nxn #( + parameter [15:0] PROTOVER = {8'd1, 8'd0}, + parameter CHDR_W = 64, + parameter [7:0] NPORTS = 8, + parameter [7:0] DEFAULT_PORT = 0, + parameter MTU = 9, + parameter ROUTE_TBL_SIZE = 6, + parameter MUX_ALLOC = "ROUND-ROBIN", + parameter OPTIMIZE = "AREA", + parameter [7:0] NPORTS_MGMT = NPORTS, + parameter [0:0] EXT_RTCFG_PORT = 0 +) ( + input wire clk, + input wire reset, + // Device info + input wire [15:0] device_id, + // Inputs + input wire [(CHDR_W*NPORTS)-1:0] s_axis_tdata, + input wire [NPORTS-1:0] s_axis_tlast, + input wire [NPORTS-1:0] s_axis_tvalid, + output wire [NPORTS-1:0] s_axis_tready, + // Output + output wire [(CHDR_W*NPORTS)-1:0] m_axis_tdata, + output wire [NPORTS-1:0] m_axis_tlast, + output wire [NPORTS-1:0] m_axis_tvalid, + input wire [NPORTS-1:0] m_axis_tready, + // Router config management port + input wire ext_rtcfg_stb, + input wire [15:0] ext_rtcfg_addr, + input wire [31:0] ext_rtcfg_data, + output wire ext_rtcfg_ack +); + // --------------------------------------------------- + // RFNoC Includes + // --------------------------------------------------- + `include "../core/rfnoc_chdr_utils.vh" + `include "../core/rfnoc_chdr_internal_utils.vh" + + localparam NPORTS_W = $clog2(NPORTS); + localparam EPID_W = 16; + localparam [17:0] EXT_INFO = {1'b0, EXT_RTCFG_PORT, NPORTS_MGMT, NPORTS}; + + localparam [0:0] PKT_ST_HEAD = 1'b0; + localparam [0:0] PKT_ST_BODY = 1'b1; + + // The compute_mux_alloc function is the switch allocation function for the MUX + // i.e. it chooses which input port reserves the output MUX for packet transfer. + function [NPORTS_W-1:0] compute_mux_alloc; + input [NPORTS-1:0] pkt_waiting; + input [NPORTS_W-1:0] last_alloc; + reg signed [NPORTS_W:0] i; + begin + compute_mux_alloc = last_alloc; + for (i = NPORTS-1; i >= 0; i=i-1) begin + if (MUX_ALLOC == "PRIO") begin + // Priority. Lower port index gets a higher priority. + if (pkt_waiting[i]) + compute_mux_alloc = i; + end else begin + // Round-robin + if (pkt_waiting[(last_alloc + i + 1) % NPORTS]) + compute_mux_alloc = (last_alloc + i + 1) % NPORTS; + end + end + end + endfunction + + wire [NPORTS-1:0] rtcfg_req_wr; + wire [(16*NPORTS)-1:0] rtcfg_req_addr; + wire [(32*NPORTS)-1:0] rtcfg_req_data; + wire [NPORTS-1:0] rtcfg_resp_ack; + wire [(EPID_W*NPORTS)-1:0] find_tdata; + wire [NPORTS-1:0] find_tvalid; + wire [NPORTS-1:0] find_tready; + wire [(NPORTS_W*NPORTS)-1:0] result_tdata; + wire [NPORTS-1:0] result_tkeep; + wire [NPORTS-1:0] result_tvalid; + wire [NPORTS-1:0] result_tready; + + // Instantiate a single CAM-based routing table that will be shared between all + // input ports. Configuration and lookup is performed using an AXI-Stream iface. + // If multiple packets arrive simultaneously, only the headers of those packets will + // be serialized in order to arbitrate this map. Selection is done round-robin. + chdr_xb_routing_table #( + .SIZE(ROUTE_TBL_SIZE), .NPORTS(NPORTS), + .EXT_INS_PORT_EN(EXT_RTCFG_PORT) + ) routing_tbl_i ( + .clk (clk ), + .reset (reset ), + .port_req_wr (rtcfg_req_wr ), + .port_req_addr (rtcfg_req_addr), + .port_req_data (rtcfg_req_data), + .port_resp_ack (rtcfg_resp_ack), + .ext_req_wr (ext_rtcfg_stb ), + .ext_req_addr (ext_rtcfg_addr), + .ext_req_data (ext_rtcfg_data), + .ext_resp_ack (ext_rtcfg_ack ), + .axis_find_tdata (find_tdata ), + .axis_find_tvalid (find_tvalid ), + .axis_find_tready (find_tready ), + .axis_result_tdata (result_tdata ), + .axis_result_tkeep (result_tkeep ), + .axis_result_tvalid(result_tvalid ), + .axis_result_tready(result_tready ) + ); + + wire [CHDR_W-1:0] i_tdata [0:NPORTS-1]; + wire [9:0] i_tdest [0:NPORTS-1]; + wire [1:0] i_tid [0:NPORTS-1]; + wire i_tlast [0:NPORTS-1]; + wire i_tvalid [0:NPORTS-1]; + wire i_tready [0:NPORTS-1]; + wire [CHDR_W-1:0] buf_tdata [0:NPORTS-1]; + wire [NPORTS_W-1:0] buf_tdest [0:NPORTS-1], buf_tdest_tmp[0:NPORTS-1]; + wire buf_tkeep [0:NPORTS-1]; + wire buf_tlast [0:NPORTS-1]; + wire buf_tvalid[0:NPORTS-1]; + wire buf_tready[0:NPORTS-1]; + wire [CHDR_W-1:0] swi_tdata [0:NPORTS-1]; + wire [NPORTS_W-1:0] swi_tdest [0:NPORTS-1]; + wire swi_tlast [0:NPORTS-1]; + wire swi_tvalid[0:NPORTS-1]; + wire swi_tready[0:NPORTS-1]; + wire [(CHDR_W*NPORTS)-1:0] swo_tdata [0:NPORTS-1], muxi_tdata [0:NPORTS-1]; + wire [NPORTS-1:0] swo_tlast [0:NPORTS-1], muxi_tlast [0:NPORTS-1]; + wire [NPORTS-1:0] swo_tvalid[0:NPORTS-1], muxi_tvalid[0:NPORTS-1]; + wire [NPORTS-1:0] swo_tready[0:NPORTS-1], muxi_tready[0:NPORTS-1]; + + genvar n, i, j; + generate + for (n = 0; n < NPORTS; n = n + 1) begin: i_ports + // For each input port, first check if we have a management packet + // arriving. If it arrives, the top config commands are extrated, sent to the + // routing table for configuration, and the rest of the packet is forwarded + // down to the router. + // the router. + if (n < NPORTS_MGMT) begin + chdr_mgmt_pkt_handler #( + .PROTOVER(PROTOVER), .CHDR_W(CHDR_W), .MGMT_ONLY(0) + ) mgmt_ep_i ( + .clk (clk ), + .rst (reset ), + .node_info (chdr_mgmt_build_node_info(EXT_INFO, n, NODE_TYPE_XBAR, device_id)), + .s_axis_chdr_tdata (s_axis_tdata [(n*CHDR_W)+:CHDR_W] ), + .s_axis_chdr_tlast (s_axis_tlast [n] ), + .s_axis_chdr_tvalid (s_axis_tvalid[n] ), + .s_axis_chdr_tready (s_axis_tready[n] ), + .s_axis_chdr_tuser ('d0 ), + .m_axis_chdr_tdata (i_tdata [n] ), + .m_axis_chdr_tdest (i_tdest [n] ), + .m_axis_chdr_tid (i_tid [n] ), + .m_axis_chdr_tlast (i_tlast [n] ), + .m_axis_chdr_tvalid (i_tvalid [n] ), + .m_axis_chdr_tready (i_tready [n] ), + .ctrlport_req_wr (rtcfg_req_wr [n] ), + .ctrlport_req_rd (/* unused */ ), + .ctrlport_req_addr (rtcfg_req_addr[(n*16)+:16] ), + .ctrlport_req_data (rtcfg_req_data[(n*32)+:32] ), + .ctrlport_resp_ack (rtcfg_resp_ack[n] ), + .ctrlport_resp_data (32'h0 /* unused */ ), + .op_stb (/* unused */ ), + .op_dst_epid (/* unused */ ), + .op_src_epid (/* unused */ ), + .op_data (/* unused */ ) + ); + end else begin + assign i_tdata [n] = s_axis_tdata [(n*CHDR_W)+:CHDR_W]; + assign i_tid [n] = CHDR_MGMT_ROUTE_EPID; + assign i_tdest [n] = 10'd0; // Unused + assign i_tlast [n] = s_axis_tlast [n]; + assign i_tvalid [n] = s_axis_tvalid[n]; + assign s_axis_tready[n] = i_tready [n]; + + assign rtcfg_req_wr [n] = 1'b0; + assign rtcfg_req_addr[(n*16)+:16] = 16'h0; + assign rtcfg_req_data[(n*32)+:32] = 32'h0; + end + + // Ingress buffer module that does the following: + // - Stores and gates an incoming packet + // - Looks up destination in routing table and attaches a tdest for the packet + chdr_xb_ingress_buff #( + .WIDTH(CHDR_W), .MTU(MTU), .DEST_W(NPORTS_W), .NODE_ID(n) + ) buf_i ( + .clk (clk ), + .reset (reset ), + .s_axis_chdr_tdata (i_tdata [n] ), + .s_axis_chdr_tdest (i_tdest [n][NPORTS_W-1:0] ), + .s_axis_chdr_tid (i_tid [n] ), + .s_axis_chdr_tlast (i_tlast [n] ), + .s_axis_chdr_tvalid (i_tvalid [n] ), + .s_axis_chdr_tready (i_tready [n] ), + .m_axis_chdr_tdata (buf_tdata [n] ), + .m_axis_chdr_tdest (buf_tdest_tmp[n] ), + .m_axis_chdr_tkeep (buf_tkeep [n] ), + .m_axis_chdr_tlast (buf_tlast [n] ), + .m_axis_chdr_tvalid (buf_tvalid [n] ), + .m_axis_chdr_tready (buf_tready [n] ), + .m_axis_find_tdata (find_tdata [(n*EPID_W)+:EPID_W] ), + .m_axis_find_tvalid (find_tvalid [n] ), + .m_axis_find_tready (find_tready [n] ), + .s_axis_result_tdata (result_tdata [(n*NPORTS_W)+:NPORTS_W]), + .s_axis_result_tkeep (result_tkeep [n] ), + .s_axis_result_tvalid(result_tvalid[n] ), + .s_axis_result_tready(result_tready[n] ) + ); + assign buf_tdest[n] = buf_tkeep[n] ? buf_tdest_tmp[n] : DEFAULT_PORT[NPORTS_W-1:0]; + + // Pipeline state + axi_fifo #( + .WIDTH(CHDR_W+1+NPORTS_W), .SIZE(1) + ) pipe_i ( + .clk (clk ), + .reset (reset ), + .clear (1'b0 ), + .i_tdata ({buf_tlast[n], buf_tdest[n], buf_tdata[n]}), + .i_tvalid (buf_tvalid[n] ), + .i_tready (buf_tready[n] ), + .o_tdata ({swi_tlast[n], swi_tdest[n], swi_tdata[n]}), + .o_tvalid (swi_tvalid[n] ), + .o_tready (swi_tready[n] ), + .space (/* Unused */ ), + .occupied (/* Unused */ ) + ); + + // Ingress demux. Use the tdest field to determine packet destination + axis_switch #( + .DATA_W(CHDR_W), .DEST_W(1), .IN_PORTS(1), .OUT_PORTS(NPORTS), .PIPELINE(1) + ) demux_i ( + .clk (clk ), + .reset (reset ), + .s_axis_tdata (swi_tdata [n] ), + .s_axis_tdest ({1'b0, swi_tdest [n]}), + .s_axis_tlast (swi_tlast [n] ), + .s_axis_tvalid (swi_tvalid[n] ), + .s_axis_tready (swi_tready[n] ), + .s_axis_alloc (1'b0 ), + .m_axis_tdata (swo_tdata [n] ), + .m_axis_tdest (/* Unused */ ), + .m_axis_tlast (swo_tlast [n] ), + .m_axis_tvalid (swo_tvalid[n] ), + .m_axis_tready (swo_tready[n] ) + ); + end + + for (i = 0; i < NPORTS; i = i + 1) begin + for (j = 0; j < NPORTS; j = j + 1) begin + assign muxi_tdata [i][j*CHDR_W+:CHDR_W] = swo_tdata [j][i*CHDR_W+:CHDR_W]; + assign muxi_tlast [i][j] = swo_tlast [j][i]; + assign muxi_tvalid[i][j] = swo_tvalid [j][i]; + assign swo_tready [i][j] = muxi_tready[j][i]; + end + end + + for (n = 0; n < NPORTS; n = n + 1) begin: o_ports + if (OPTIMIZE == "PERFORMANCE") begin + // Use the axis_switch module when optimizing for performance + // This logic has some extra levels of logic to ensure + // that the switch allocation happens in 0 clock cycles which + // means that Fmax for this implementation will be lower. + + wire mux_ready = |muxi_tready[n]; // Max 1 bit should be high + wire mux_valid = |muxi_tvalid[n]; + wire mux_last = |(muxi_tvalid[n] & muxi_tlast[n]); + + // Track the input packet state + reg [0:0] pkt_state = PKT_ST_HEAD; + always @(posedge clk) begin + if (reset) begin + pkt_state <= PKT_ST_HEAD; + end else if (mux_valid & mux_ready) begin + pkt_state <= mux_last ? PKT_ST_HEAD : PKT_ST_BODY; + end + end + + // The switch requires the allocation to stay valid until the + // end of the packet. We also might need to keep the previous + // packet's allocation to compute the current one + reg [NPORTS_W-1:0] prev_sw_alloc = {NPORTS_W{1'b0}}; + reg [NPORTS_W-1:0] pkt_sw_alloc = {NPORTS_W{1'b0}}; + wire [NPORTS_W-1:0] muxi_sw_alloc = (mux_valid && pkt_state == PKT_ST_HEAD) ? + compute_mux_alloc(muxi_tvalid[n], prev_sw_alloc) : pkt_sw_alloc; + + always @(posedge clk) begin + if (reset) begin + prev_sw_alloc <= {NPORTS_W{1'b0}}; + pkt_sw_alloc <= {NPORTS_W{1'b0}}; + end else if (mux_valid & mux_ready) begin + if (pkt_state == PKT_ST_HEAD) + pkt_sw_alloc <= muxi_sw_alloc; + if (mux_last) + prev_sw_alloc <= muxi_sw_alloc; + end + end + + axis_switch #( + .DATA_W(CHDR_W), .DEST_W(1), .IN_PORTS(NPORTS), .OUT_PORTS(1), + .PIPELINE(0) + ) mux_i ( + .clk (clk ), + .reset (reset ), + .s_axis_tdata (muxi_tdata [n] ), + .s_axis_tdest ({NPORTS{1'b0}} /* Unused */ ), + .s_axis_tlast (muxi_tlast [n] ), + .s_axis_tvalid (muxi_tvalid[n] ), + .s_axis_tready (muxi_tready[n] ), + .s_axis_alloc (muxi_sw_alloc ), + .m_axis_tdata (m_axis_tdata [(n*CHDR_W)+:CHDR_W]), + .m_axis_tdest (/* Unused */ ), + .m_axis_tlast (m_axis_tlast [n] ), + .m_axis_tvalid (m_axis_tvalid[n] ), + .m_axis_tready (m_axis_tready[n] ) + ); + end else begin + // axi_mux has an additional bubble cycle but the logic + // to allocate an input port has fewer levels and takes + // up fewer resources. + axi_mux #( + .PRIO(MUX_ALLOC == "PRIO"), .WIDTH(CHDR_W), .SIZE(NPORTS), + .PRE_FIFO_SIZE(OPTIMIZE == "TIMING" ? 1 : 0), .POST_FIFO_SIZE(1) + ) mux_i ( + .clk (clk ), + .reset (reset ), + .clear (1'b0 ), + .i_tdata (muxi_tdata [n] ), + .i_tlast (muxi_tlast [n] ), + .i_tvalid (muxi_tvalid [n] ), + .i_tready (muxi_tready [n] ), + .o_tdata (m_axis_tdata [(n*CHDR_W)+:CHDR_W]), + .o_tlast (m_axis_tlast [n] ), + .o_tvalid (m_axis_tvalid[n] ), + .o_tready (m_axis_tready[n] ) + ); + end + end + endgenerate + + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v new file mode 100644 index 000000000..dcb11da8e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_ingress_buff.v @@ -0,0 +1,259 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_ingress_buff +// +// Description: +// +// Ingress buffer module for the CHDR crossbar. This module stores and gates +// the incoming packet and simultaneously determines the destination (TDEST) +// by inspecting the incoming TID. If the TID is CHDR_MGMT_ROUTE_EPID then we +// perform a lookup on the TID to determine the correct output for TDEST. +// +// Parameters: +// +// WIDTH : Data width of the CHDR interfaces (TDATA) +// MTU : Maximum transmission unit, in WIDTH-sized words, is 2**MTU +// DEST_W : Width of the destination routing information (TDEST) +// NODE_ID : Numeric identifier for this port +// + +module chdr_xb_ingress_buff #( + parameter WIDTH = 64, + parameter MTU = 5, + parameter DEST_W = 4, + parameter [9:0] NODE_ID = 0 +) ( + input wire clk, + input wire reset, + // CHDR input port + input wire [WIDTH-1:0] s_axis_chdr_tdata, + input wire [DEST_W-1:0] s_axis_chdr_tdest, + input wire [1:0] s_axis_chdr_tid, + input wire s_axis_chdr_tlast, + input wire s_axis_chdr_tvalid, + output wire s_axis_chdr_tready, + // CHDR output port (with a tdest and tkeep) + output wire [WIDTH-1:0] m_axis_chdr_tdata, + output wire [DEST_W-1:0] m_axis_chdr_tdest, + output wire m_axis_chdr_tkeep, + output wire m_axis_chdr_tlast, + output wire m_axis_chdr_tvalid, + input wire m_axis_chdr_tready, + // Find port going to routing table + output wire [15:0] m_axis_find_tdata, + output wire m_axis_find_tvalid, + input wire m_axis_find_tready, + // Result port from routing table + input wire [DEST_W-1:0] s_axis_result_tdata, + input wire s_axis_result_tkeep, + input wire s_axis_result_tvalid, + output wire s_axis_result_tready +); + + // RFNoC Includes + `include "../core/rfnoc_chdr_utils.vh" + `include "../core/rfnoc_chdr_internal_utils.vh" + + + //--------------------------------------------------------------------------- + // Packet Buffer + //--------------------------------------------------------------------------- + + wire [WIDTH-1:0] gate_i_tdata , gate_o_tdata ; + wire gate_i_tlast , gate_o_tlast ; + wire gate_i_tvalid, gate_o_tvalid; + wire gate_i_tready, gate_o_tready; + + // The axi_packet_gate queues up an entire packet before letting it go out. + // This reduces congestion in the crossbar for slowly-built packets. + axi_packet_gate #( + .WIDTH (WIDTH), + .SIZE (MTU) + ) axi_packet_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (gate_i_tdata), + .i_tlast (gate_i_tlast), + .i_terror (1'b0), + .i_tvalid (gate_i_tvalid), + .i_tready (gate_i_tready), + .o_tdata (gate_o_tdata), + .o_tlast (gate_o_tlast), + .o_tvalid (gate_o_tvalid), + .o_tready (gate_o_tready) + ); + + + //--------------------------------------------------------------------------- + // Destination (TDEST) Muxing + //--------------------------------------------------------------------------- + + wire [15:0] find_tdata; + wire find_tvalid, find_tready; + + wire [DEST_W-1:0] dest_i_tdata; + wire dest_i_tkeep, dest_i_tvalid, dest_i_tready; + wire [DEST_W-1:0] dest_o_tdata; + wire dest_o_tkeep, dest_o_tvalid, dest_o_tready; + + // The find_fifo holds the lookup requests from the find_* AXI stream and + // sends them on to the m_axis_find_* stream port. It is required because the + // input logic (see below) doesn't obey the AXI handshake protocol but this + // FIFO can tolerate it. + axi_fifo #( + .WIDTH (16), + .SIZE (1) + ) find_fifo_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (find_tdata), + .i_tvalid (find_tvalid), + .i_tready (find_tready), + .o_tdata (m_axis_find_tdata), + .o_tvalid (m_axis_find_tvalid), + .o_tready (m_axis_find_tready), + .space (), + .occupied () + ); + + // The destination (TDEST) can come from two sources: Directly from the + // packet info (in which case TDEST was immediately determined and comes in + // on dest_* AXI stream) or via a lookup (in which case the result comes in + // on s_axis_result_*). Only one of these data paths is used at a time, so we + // mux them together here create a single stream (dest_o_*) that contains the + // destination for the next packet. + axi_mux #( + .WIDTH (DEST_W+1), + .SIZE (2), + .PRIO (1), + .PRE_FIFO_SIZE (1), + .POST_FIFO_SIZE (1) + ) dest_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({dest_i_tkeep, dest_i_tdata, + s_axis_result_tkeep, s_axis_result_tdata}), + .i_tlast (2'b11), + .i_tvalid ({dest_i_tvalid, s_axis_result_tvalid}), + .i_tready ({dest_i_tready, s_axis_result_tready}), + .o_tdata ({dest_o_tkeep, dest_o_tdata}), + .o_tlast (), + .o_tvalid (dest_o_tvalid), + .o_tready (dest_o_tready) + ); + + + //--------------------------------------------------------------------------- + // Input Logic + //--------------------------------------------------------------------------- + // + // When a packet comes in, we may have to do one of the following: + // 1) Lookup the TDEST using the EPID + // 2) Use the specified input TDEST + // 3) Use the NODE_ID as the TDEST (to return the packet) + // + //--------------------------------------------------------------------------- + + // The s_axis_chdr_hdr_valid signal indicates when TDATA and TID contain the + // header information for the current packet. + reg s_axis_chdr_hdr_valid = 1'b1; + + always @(posedge clk) begin + if (reset) begin + s_axis_chdr_hdr_valid <= 1'b1; + end else if (s_axis_chdr_tvalid & s_axis_chdr_tready) begin + s_axis_chdr_hdr_valid <= s_axis_chdr_tlast; + end + end + + // The dest_find_tready signal indicates if the find_fifo is ready or if the + // dest port of the dest_muax is ready, depending on which path will be used. + reg dest_find_tready; + + always @(*) begin + if (s_axis_chdr_hdr_valid) begin + case (s_axis_chdr_tid) + CHDR_MGMT_ROUTE_EPID: + dest_find_tready = find_tready; + CHDR_MGMT_ROUTE_TDEST: + dest_find_tready = dest_i_tready; + CHDR_MGMT_RETURN_TO_SRC: + dest_find_tready = dest_i_tready; + default: + dest_find_tready = dest_i_tready; // We should never get here + endcase + end else begin + dest_find_tready = 1'b1; + end + end + + // We can accept a transfer from the input CHDR stream only if the the packet + // gate and dest/find datapaths are ready. + assign s_axis_chdr_tready = s_axis_chdr_tvalid && + gate_i_tready && + dest_find_tready; + + // The chdr_header_stb signal indicates when we write data into the dest/find + // data path. This happens when we're accepting the header word of the packet + // into the packet gate. + wire chdr_header_stb = s_axis_chdr_tvalid && + s_axis_chdr_tready && + s_axis_chdr_hdr_valid; + + // ************************************************************************** + // WARNING: The logic below violates AXI-Stream by having a tready -> tvalid + // dependency To ensure no deadlocks, we must place FIFOs downstream + // of gate_i_*, find_* and dest_i_* + + // Here we decide if we need to do a lookup using the find_* path or if the + // destination is known and can be put directly on the dest_* path. + // + // Start a lookup request if the TID is CHDR_MGMT_ROUTE_EPID. + assign find_tdata = chdr_get_dst_epid(s_axis_chdr_tdata[63:0]); + assign find_tvalid = chdr_header_stb && + (s_axis_chdr_tid == CHDR_MGMT_ROUTE_EPID); + // Set TDEST directly if TID is CHDR_MGMT_ROUTE_TDEST or + // CHDR_MGMT_RETURN_TO_SRC. + assign dest_i_tdata = (s_axis_chdr_tid == CHDR_MGMT_ROUTE_TDEST) ? + s_axis_chdr_tdest : NODE_ID[DEST_W-1:0]; + assign dest_i_tkeep = 1'b1; + assign dest_i_tvalid = chdr_header_stb && + (s_axis_chdr_tid != CHDR_MGMT_ROUTE_EPID); + + // Input logic for axi_packet_gate + assign gate_i_tdata = s_axis_chdr_tdata; + assign gate_i_tlast = s_axis_chdr_tlast; + assign gate_i_tvalid = s_axis_chdr_tready && s_axis_chdr_tvalid; + + // + // ************************************************************************** + + + //--------------------------------------------------------------------------- + // Output Logic + //--------------------------------------------------------------------------- + // + // The destination for the packet (TDEST) must be valid before we allow the + // header of the packet to pass through. So the packet must be blocked until + // the output of the dest_o_* is valid. TDEST and TKEEP must remain valid + // until the end of the packet. + // + //--------------------------------------------------------------------------- + + assign m_axis_chdr_tdata = gate_o_tdata; + assign m_axis_chdr_tlast = gate_o_tlast; + assign m_axis_chdr_tdest = dest_o_tdata; + assign m_axis_chdr_tkeep = dest_o_tkeep; + assign m_axis_chdr_tvalid = gate_o_tvalid && dest_o_tvalid; + + assign gate_o_tready = m_axis_chdr_tvalid && m_axis_chdr_tready; + assign dest_o_tready = m_axis_chdr_tvalid && m_axis_chdr_tready && m_axis_chdr_tlast; + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v new file mode 100644 index 000000000..f445efc68 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/chdr_xb_routing_table.v @@ -0,0 +1,122 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_xb_routing_table +// Description: +// A routing table for the CHDR crossbar. This table is designed +// to be shared between all ports. It has an AXI-Stream lookup +// interface and a ctrlport (reduced) configuration interface. + +module chdr_xb_routing_table #( + parameter SIZE = 6, + parameter NPORTS = 4, + parameter EXT_INS_PORT_EN = 1 +) ( + // Clocks and resets + input wire clk, + input wire reset, + // Insertion Interface (for XB ports) + input wire [NPORTS-1:0] port_req_wr, + input wire [(16*NPORTS)-1:0] port_req_addr, + input wire [(32*NPORTS)-1:0] port_req_data, + output wire [NPORTS-1:0] port_resp_ack, + // Insertion Interface (External) + input wire ext_req_wr, + input wire [15:0] ext_req_addr, + input wire [31:0] ext_req_data, + output wire ext_resp_ack, + // Find Interface + input wire [(16*NPORTS)-1:0] axis_find_tdata, + input wire [NPORTS-1:0] axis_find_tvalid, + output wire [NPORTS-1:0] axis_find_tready, + // Result Interface (for Find) + output wire [($clog2(NPORTS)*NPORTS)-1:0] axis_result_tdata, + output wire [NPORTS-1:0] axis_result_tkeep, + output wire [NPORTS-1:0] axis_result_tvalid, + input wire [NPORTS-1:0] axis_result_tready +); + localparam NPORTS_W = $clog2(NPORTS); + localparam CFG_W = NPORTS_W + 16; + localparam CFG_PORTS = NPORTS + EXT_INS_PORT_EN; + + // CAM-based lookup table + + wire [15:0] insert_tdest; + wire [NPORTS_W-1:0] insert_tdata; + wire insert_tvalid; + wire insert_tready; + + axis_muxed_kv_map #( + .KEY_WIDTH(16), .VAL_WIDTH(NPORTS_W), + .SIZE(SIZE), .NUM_PORTS(NPORTS) + ) kv_map_i ( + .clk (clk ), + .reset (reset ), + .axis_insert_tdata (insert_tdata ), + .axis_insert_tdest (insert_tdest ), + .axis_insert_tvalid(insert_tvalid ), + .axis_insert_tready(insert_tready ), + .axis_find_tdata (axis_find_tdata ), + .axis_find_tvalid (axis_find_tvalid ), + .axis_find_tready (axis_find_tready ), + .axis_result_tdata (axis_result_tdata ), + .axis_result_tkeep (axis_result_tkeep ), + .axis_result_tvalid(axis_result_tvalid), + .axis_result_tready(axis_result_tready) + ); + + // Logic to convert from ctrlport to AXI-Stream + + wire ins_req_wr [0:CFG_PORTS-1]; + wire [15:0] ins_req_addr[0:CFG_PORTS-1]; + wire [NPORTS_W-1:0] ins_req_data[0:CFG_PORTS-1]; + wire ins_resp_ack[0:CFG_PORTS-1]; + + reg [(CFG_PORTS*CFG_W)-1:0] cfg_tdata; + reg [CFG_PORTS-1:0] cfg_tvalid = {CFG_PORTS{1'b0}}; + wire [CFG_PORTS-1:0] cfg_tready; + + genvar i; + generate for (i = 0; i < CFG_PORTS; i=i+1) begin + assign ins_req_wr [i] = (i < NPORTS) ? port_req_wr[i] : ext_req_wr; + assign ins_req_addr[i] = (i < NPORTS) ? port_req_addr[i*16 +: 16] : ext_req_addr; + assign ins_req_data[i] = (i < NPORTS) ? port_req_data[i*32 +: NPORTS_W] : ext_req_data[NPORTS_W-1:0]; + if (i < NPORTS) + assign port_resp_ack[i] = ins_resp_ack[i]; + else + assign ext_resp_ack = ins_resp_ack[i]; + + always @(posedge clk) begin + if (reset) begin + cfg_tvalid[i] <= 1'b0; + end else begin + if (~cfg_tvalid[i]) begin + if (ins_req_wr[i]) begin + cfg_tvalid[i] <= 1'b1; + cfg_tdata[(CFG_W*i) +: CFG_W] <= {ins_req_data[i], ins_req_addr[i]}; + end + end else begin + cfg_tvalid[i] <= ~cfg_tready[i]; + end + end + end + assign ins_resp_ack[i] = cfg_tvalid[i] & cfg_tready[i]; + end endgenerate + + // Multiplexer between XB ports and external cfg + + axi_mux #( + .WIDTH(CFG_W), .SIZE(CFG_PORTS), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) rtcfg_mux_i ( + .clk(clk), .reset(reset), .clear(1'b0), + .i_tdata(cfg_tdata), .i_tlast({(NPORTS_W + 16){1'b1}}), + .i_tvalid(cfg_tvalid), .i_tready(cfg_tready), + .o_tdata({insert_tdata, insert_tdest}), .o_tlast(), + .o_tvalid(insert_tvalid), .o_tready(insert_tready) + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile new file mode 100644 index 000000000..7fa7ae03b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/Makefile @@ -0,0 +1,52 @@ +# +# Copyright 2015 Ettus Research LLC +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Define part using PART_ID (<device>/<package>/<speedgrade>) +ARCH = kintex7 +PART_ID = xc7k410t/ffg900/-2 + +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/control/Makefile.srcs +include $(BASE_DIR)/../lib/fifo/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs + +DESIGN_SRCS = $(abspath \ +$(FIFO_SRCS) \ +$(CONTROL_LIB_SRCS) \ +$(RFNOC_XBAR_SRCS) \ +$(RFNOC_CORE_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +TB_TOP_MODULE ?= crossbar_tb +SIM_TOP = $(TB_TOP_MODULE) + +SIM_SRCS = \ +$(abspath chdr_traffic_source_sim.sv) \ +$(abspath chdr_traffic_sink_sim.sv) \ +$(abspath crossbar_tb.sv) \ +$(abspath $(TB_TOP_MODULE).sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile new file mode 100644 index 000000000..0f1a10a6e --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/Makefile @@ -0,0 +1,51 @@ +# +# Copyright 2015 Ettus Research LLC +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Define part using PART_ID (<device>/<package>/<speedgrade>) +ARCH = kintex7 +PART_ID = xc7k410t/ffg900/-2 + +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/control/Makefile.srcs +include $(BASE_DIR)/../lib/fifo/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs + +DESIGN_SRCS = $(abspath \ +$(FIFO_SRCS) \ +$(CONTROL_LIB_SRCS) \ +$(RFNOC_XBAR_SRCS) \ +$(RFNOC_CORE_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +SIM_TOP = axis_ctrl_crossbar_nxn_tb + +SIM_SRCS = \ +$(abspath axis_ctrl_crossbar_nxn_tb.sv) \ +$(abspath ../crossbar_tb.sv) \ +$(abspath ../chdr_traffic_source_sim.sv) \ +$(abspath ../chdr_traffic_sink_sim.sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv new file mode 100644 index 000000000..fa112f5cb --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/axis_ctrl_crossbar_nxn_tb/axis_ctrl_crossbar_nxn_tb.sv @@ -0,0 +1,26 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + + +`timescale 1ns/1ps + +module axis_ctrl_crossbar_nxn_tb(); + crossbar_tb #( + .TEST_NAME ("axis_ctrl_crossbar_nxn_tb"), + .ROUTER_IMPL ("axis_ctrl_2d_torus" ), // Router implementation + .ROUTER_PORTS (20 ), // Number of ports + .ROUTER_DWIDTH (64 ), // Router datapath width + .MTU_LOG2 (5 ), // log2 of max packet size for router + .NUM_MASTERS (4 ), // Number of data generators in test + .TEST_MAX_PACKETS (100 ), // How many packets to stream per test case? + .TEST_LPP (20 ), // Lines per packet + .TEST_MIN_INJ_RATE (10 ), // Minimum injection rate to test + .TEST_MAX_INJ_RATE (40 ), // Maximum injection rate to test + .TEST_INJ_RATE_INCR (10 ), // Injection rate increment + .TEST_GEN_LL_FILES (0 ) // Generate files to produce load-latency graphs? + ) impl ( + /* no IO */ + ); +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile new file mode 100644 index 000000000..399515640 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/Makefile @@ -0,0 +1,51 @@ +# +# Copyright 2015 Ettus Research LLC +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Define part using PART_ID (<device>/<package>/<speedgrade>) +ARCH = kintex7 +PART_ID = xc7k410t/ffg900/-2 + +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/control/Makefile.srcs +include $(BASE_DIR)/../lib/fifo/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/crossbar/Makefile.srcs +include $(BASE_DIR)/../lib/rfnoc/core/Makefile.srcs + +DESIGN_SRCS = $(abspath \ +$(FIFO_SRCS) \ +$(CONTROL_LIB_SRCS) \ +$(RFNOC_XBAR_SRCS) \ +$(RFNOC_CORE_SRCS) \ +) + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +# Define only one toplevel module +SIM_TOP = chdr_crossbar_nxn_tb + +SIM_SRCS = \ +$(abspath chdr_crossbar_nxn_tb.sv) \ +$(abspath ../crossbar_tb.sv) \ +$(abspath ../chdr_traffic_source_sim.sv) \ +$(abspath ../chdr_traffic_sink_sim.sv) + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv new file mode 100644 index 000000000..1c5cace63 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_crossbar_nxn_tb/chdr_crossbar_nxn_tb.sv @@ -0,0 +1,26 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + + +`timescale 1ns/1ps + +module chdr_crossbar_nxn_tb(); + crossbar_tb #( + .TEST_NAME ("chdr_crossbar_nxn_tb"), + .ROUTER_IMPL ("chdr_crossbar_nxn" ), // Router implementation + .ROUTER_PORTS (10 ), // Number of ports + .ROUTER_DWIDTH (64 ), // Router datapath width + .MTU_LOG2 (7 ), // log2 of max packet size for router + .NUM_MASTERS (10 ), // Number of data generators in test + .TEST_MAX_PACKETS (100 ), // How many packets to stream per test case? + .TEST_LPP (100 ), // Lines per packet + .TEST_MIN_INJ_RATE (60 ), // Minimum injection rate to test + .TEST_MAX_INJ_RATE (100 ), // Maximum injection rate to test + .TEST_INJ_RATE_INCR (10 ), // Injection rate increment + .TEST_GEN_LL_FILES (0 ) // Generate files to produce load-latency graphs? + ) impl ( + /* no IO */ + ); +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv new file mode 100644 index 000000000..a9fe3ba27 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_sink_sim.sv @@ -0,0 +1,150 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_traffic_sink_sim +// Description: +// A sink for CHDR traffic. Simulation only. +// Accepts packets and computes the following metrics: +// - Data integrity errors +// - Packet latency +// - Throughput counts +// All metrics can optionally be written to a file to +// generate load-latency graphs. + +`timescale 1ns/1ps + +`include "sim_cvita_lib.svh" + +module chdr_traffic_sink_sim #( + parameter WIDTH = 64, + parameter MTU = 5, + parameter [15:0] NODE_ID = 'd0, + parameter [15:0] NUM_NODES = 'd16, + parameter FILE_PATH = ".", + parameter FLUSH_N = 4 +) ( + // Clocks and resets + input clk, + input rst, + // Settings + input [63:0] current_time, + input start_stb, + input [7:0] injection_rate, + input [15:0] lines_per_pkt, + input [7:0] traffic_patt, + // CHDR master interface + input [WIDTH-1:0] s_axis_tdata, + input s_axis_tlast, + input s_axis_tvalid, + output s_axis_tready, + // Metrics + output session_active, + output [31:0] xfer_count, + output [31:0] pkt_count, + output [31:0] data_err_count, + output [31:0] route_err_count +); + + // Constants + localparam integer ERR_BIT_PKT_SIZE_MISMATCH = 1; + localparam integer ERR_BIT_PKT_DATA_MISMATCH = 2; + localparam integer ERR_BIT_PKT_DEST_MISMATCH = 4; + localparam integer ERR_BIT_PKT_SEQUENCE_ERR = 8; + + cvita_slave #(.DWIDTH(WIDTH)) s_chdr (.clk(clk)); + cvita_pkt_t pkt; + + assign s_chdr.axis.tdata = s_axis_tdata; + assign s_chdr.axis.tlast = s_axis_tlast; + assign s_chdr.axis.tvalid = s_axis_tvalid; + assign s_axis_tready = s_chdr.axis.tready; + + logic running = 0; + integer num_data_errs = 0; + integer num_route_errs = 0; + logic [31:0] num_pkts_xferd = 0; + logic [31:0] num_samps_xferd = 0; + + assign data_err_count = num_data_errs; + assign route_err_count = num_route_errs; + assign xfer_count = num_samps_xferd; + assign pkt_count = num_pkts_xferd; + assign session_active = running; + + integer session = 0; + string filename; + integer handle = 0; + integer err = 0; + integer bus_idle_cnt = 0; + logic [WIDTH-1:0] i; + + // Egress buff in source is MTU + 4 + localparam integer IDLE_TIMEOUT = (1 << (MTU + 4 + FLUSH_N)); + + initial begin: consume_blk + // Consume infinitely + s_chdr.reset(); + while (1) begin + // A session begins on the posedge of start_stb + while (~start_stb) @(posedge clk); + session = session + 1; + $sformat(filename, "%s/pkts_node%05d_inj%03d_lpp%05d_traffic%c_sess%04d.csv", + FILE_PATH, NODE_ID, injection_rate, lines_per_pkt, traffic_patt, session); + if (FILE_PATH != "") begin + handle = $fopen(filename, "w"); + if (handle == 0) begin + $error("Could not open file: %s", filename); + $finish(); + end + end + if (handle != 0) $fdisplay(handle, "Src,Dst,Seqno,Error,Latency"); + s_chdr.reset(); + num_data_errs = 0; + num_route_errs = 0; + num_pkts_xferd = 0; + num_samps_xferd = 0; + bus_idle_cnt = 0; + running = 1; + while (1) begin + // Pull packet from bus + err = 0; + if (~s_chdr.axis.tvalid[0]) begin + @(posedge clk); + bus_idle_cnt = bus_idle_cnt + 1; + if (bus_idle_cnt <= IDLE_TIMEOUT) + continue; + else + break; + end + s_chdr.pull_pkt(pkt, 0); + bus_idle_cnt = 0; + num_pkts_xferd = num_pkts_xferd + 1; + num_samps_xferd = num_samps_xferd + lines_per_pkt; + // Validate packet + if (pkt.hdr.dst_sid != NODE_ID) begin + err = err + ERR_BIT_PKT_DEST_MISMATCH; + num_route_errs = num_route_errs + 1; + end + if (pkt.payload.size() != lines_per_pkt-2) begin + err = err + ERR_BIT_PKT_SIZE_MISMATCH; + num_data_errs = num_data_errs + 1; + end else begin + for (i = 'd0; i < (lines_per_pkt-2); i=i+1) begin + if (pkt.payload[i] != i) begin + err = err + ERR_BIT_PKT_DATA_MISMATCH; + num_data_errs = num_data_errs + 1; + break; + end + end + end + if (handle != 0) $fdisplay(handle, "%00d,%00d,%00d,%00d,%00d", + pkt.hdr.src_sid, pkt.hdr.dst_sid, pkt.hdr.seqnum, err, (current_time - pkt.hdr.timestamp)); + end + running = 0; + if (handle != 0) $fclose(handle); + end + end + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv new file mode 100644 index 000000000..8c3d974c9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/chdr_traffic_source_sim.sv @@ -0,0 +1,202 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: chdr_traffic_source_sim +// Description: +// A traffic generator for CHDR traffic. Simulation only. +// Supports multiple traffic pattern and injection rates. +// + +`timescale 1ns/1ps + +`include "sim_cvita_lib.svh" + +module chdr_traffic_source_sim #( + parameter WIDTH = 64, // Width of the AXI-Stream data bus + parameter MTU = 5, // log2 of the max number of lines in a packet + parameter [15:0] NODE_ID = 'd0, // Node ID for this generator + parameter [15:0] NUM_NODES = 'd16 // Total number of generators in the application +) ( + // Clocks and resets + input clk, // AXI-Stream clock + input rst, // AXI-Stream reset + // Settings + input [63:0] current_time, // The current value of the global timebase (synch to clk) + input start_stb, // A strobe that indicates the start of a generation session + input [7:0] injection_rate, // The inject rate (in percent) to simulate + input [15:0] lines_per_pkt, // Number of lines per packet to generate + input [7:0] traffic_patt, // The traffic pattern (see localparams below for values) + input [31:0] num_pkts_to_send, // Number of packets to send + // CHDR master interface + output [WIDTH-1:0] m_axis_tdata, // AXI-Stream master tdata + output m_axis_tlast, // AXI-Stream master tlast + output m_axis_tvalid, // AXI-Stream master tvalid + input m_axis_tready, // AXI-Stream master tready + // Metrics + output session_active, // Signal indicating if generation session is active + output [63:0] session_duration, // Session duration (only valid after session ends) + output [31:0] xfer_count, // Number of lines transferred (only valid after session ends) + output [31:0] pkt_count // Number of packets transferred (only valid after session ends) +); + // **** Supported Traffic Patters **** + localparam [7:0] TRAFFIC_PATT_LOOPBACK = 8'd76; //L + localparam [7:0] TRAFFIC_PATT_NEIGHBOR = 8'd78; //N + localparam [7:0] TRAFFIC_PATT_BIT_COMPLEMENT = 8'd67; //C + localparam [7:0] TRAFFIC_PATT_SEQUENTIAL = 8'd83; //S + localparam [7:0] TRAFFIC_PATT_UNIFORM = 8'd85; //U + localparam [7:0] TRAFFIC_PATT_UNIFORM_OTHERS = 8'd79; //O + localparam [7:0] TRAFFIC_PATT_RANDOM_PERM = 8'd82; //R + + cvita_master #(.DWIDTH(WIDTH)) m_chdr (.clk(clk)); + axis_t #(.DWIDTH(WIDTH)) post_fifo (.clk(clk)); + axis_t #(.DWIDTH(WIDTH)) pre_gate (.clk(clk)); + cvita_hdr_t header; + reg throttle = 1'b1; + + logic running = 0; + logic [31:0] curr_pkt_num = 'd0; + logic [31:0] num_samps_xferd = 'd0; + logic [63:0] start_time = 0; + logic [63:0] stop_time = 0; + logic [15:0] last_gen_sid = (NODE_ID - 16'd1); + + assign xfer_count = num_samps_xferd; + assign pkt_count = curr_pkt_num; + assign session_duration = (stop_time - start_time); + assign session_active = running; + + // Utility function to assign SIDs based on traffic pattern + function [15:0] gen_dst_sid; + input [7:0] traffic_patt; + input [15:0] last_sid; + + if (traffic_patt == TRAFFIC_PATT_UNIFORM) begin + gen_dst_sid = $urandom_range('d0, NUM_NODES-'d1); + end else if (traffic_patt == TRAFFIC_PATT_UNIFORM_OTHERS) begin + logic [31:0] rnum = $urandom_range('d0, NUM_NODES-'d2); + if (rnum < NODE_ID) + gen_dst_sid = rnum[15:0]; + else + gen_dst_sid = rnum[15:0] + 16'd1; + end else if (traffic_patt == TRAFFIC_PATT_SEQUENTIAL) begin + gen_dst_sid = (last_sid + 16'd1) % NUM_NODES; + end else if (traffic_patt == TRAFFIC_PATT_NEIGHBOR) begin + gen_dst_sid = (NODE_ID + 16'd1) % NUM_NODES; + end else if (traffic_patt == TRAFFIC_PATT_LOOPBACK) begin + gen_dst_sid = NODE_ID; + end else if (traffic_patt == TRAFFIC_PATT_BIT_COMPLEMENT) begin + gen_dst_sid = (NUM_NODES - NODE_ID - 1) % NUM_NODES; + end else if (traffic_patt == TRAFFIC_PATT_RANDOM_PERM) begin + //TODO: Implement me + gen_dst_sid = 0; + end else begin + gen_dst_sid = 'd0; + end + endfunction + + // Generation loop. Push to m_chdr infinitely fast + initial begin: gen_blk + // Generate infinitely + $srandom(NODE_ID + NUM_NODES); + m_chdr.reset(); + while (1) begin + // A generation session begins on the posedge of start_stb + while (~start_stb) @(posedge clk); + curr_pkt_num = 'd0; + m_chdr.reset(); + num_samps_xferd = 'd0; + start_time = current_time; + running = 1; + while (curr_pkt_num < num_pkts_to_send) begin + header = '{ + pkt_type:DATA, has_time:1, eob:0, + seqnum:curr_pkt_num[11:0], length:(lines_per_pkt*8), + src_sid:NODE_ID, dst_sid:gen_dst_sid(traffic_patt, last_gen_sid), + timestamp:0 //TS attached later + }; + last_gen_sid = header.dst_sid; + curr_pkt_num = curr_pkt_num + 'd1; + m_chdr.push_ramp_pkt(lines_per_pkt-2, 'h0, 'h1, header); + num_samps_xferd = num_samps_xferd + lines_per_pkt; + end + running = 0; + stop_time = current_time; + end + end + + // Capture packets in a really short FIFO (for backpressure) + axi_fifo #( + .WIDTH(WIDTH+1), .SIZE(MTU + 1) + ) fifo_i ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .i_tdata ({m_chdr.axis.tlast, m_chdr.axis.tdata}), + .i_tvalid (m_chdr.axis.tvalid), + .i_tready (m_chdr.axis.tready), + .o_tdata ({post_fifo.tlast, post_fifo.tdata}), + .o_tvalid (post_fifo.tvalid), + .o_tready (post_fifo.tready), + .space (), + .occupied () + ); + + // Attach timestamp after the packet leaves the FIFO after + // throttling. + + localparam [1:0] ST_HDR = 2'd0; + localparam [1:0] ST_TS = 2'd1; + localparam [1:0] ST_BODY = 2'd2; + + reg [1:0] pkt_state = ST_HDR; + always_ff @(posedge clk) begin + if (rst) begin + pkt_state <= ST_HDR; + end else if (pre_gate.tvalid & pre_gate.tready) begin + case (pkt_state) + ST_HDR: + if (~pre_gate.tlast) + pkt_state <= pre_gate.tdata[61] ? ST_TS : ST_BODY; + ST_TS: + pkt_state <= pre_gate.tlast ? ST_HDR : ST_BODY; + ST_BODY: + pkt_state <= pre_gate.tlast ? ST_HDR : ST_BODY; + default: + pkt_state <= ST_HDR; + endcase + end + end + + // Enforce injection rate by pulling from FIFO with a certain time probability + always_ff @(posedge clk) begin + throttle <= ($urandom_range(32'd99, 32'd0) > {24'h0, injection_rate}); + end + + // Insert timestamp + throttle logic + assign pre_gate.tdata = (pkt_state == ST_TS) ? current_time : post_fifo.tdata; + assign pre_gate.tlast = post_fifo.tlast; + assign pre_gate.tvalid = post_fifo.tvalid & ~throttle; + assign post_fifo.tready = pre_gate.tready & ~throttle; + + // Gate the packet to smooth out throttle-related noise. + // This also serves as a buffer for the packet in case things are backed up + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(MTU + 4), .USE_AS_BUFF(1) + ) pkt_gate_i ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .i_tdata (pre_gate.tdata), + .i_tlast (pre_gate.tlast), + .i_terror (1'b0), + .i_tvalid (pre_gate.tvalid), + .i_tready (pre_gate.tready), + .o_tdata (m_axis_tdata), + .o_tlast (m_axis_tlast), + .o_tvalid (m_axis_tvalid), + .o_tready (m_axis_tready) + ); + +endmodule
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv new file mode 100644 index 000000000..fc9d53fe7 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/crossbar_tb.sv @@ -0,0 +1,428 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later + + +`timescale 1ns/1ps +`define NS_PER_TICK 1 +`define NUM_TEST_CASES 7 + +`include "sim_clks_rsts.vh" +`include "sim_exec_report.vh" +`include "sim_set_rb_lib.svh" +`include "sim_axis_lib.svh" + +`define SIM_TIMEOUT_US 1000000 // Default: 1s + +module crossbar_tb #( + parameter TEST_NAME = "crossbar_tb", + // Router parameters + parameter ROUTER_IMPL = "axi_crossbar", // Router implementation + parameter ROUTER_PORTS = 10, // # Router ports + parameter ROUTER_DWIDTH = 64, // Router datapath width + parameter MTU_LOG2 = 7, // log2 of max packet size for router + parameter NUM_MASTERS = ROUTER_PORTS, // Number of data generators in test + // Test parameters + parameter TEST_MAX_PACKETS = 50, // How many packets to stream per test case? + parameter TEST_LPP = 50, // Lines per packet + parameter TEST_MIN_INJ_RATE = 60, // Minimum injection rate to test + parameter TEST_MAX_INJ_RATE = 100, // Maximum injection rate to test + parameter TEST_INJ_RATE_INCR = 10, // Injection rate increment + parameter TEST_GEN_LL_FILES = 0 // Generate files to produce load-latency graphs? + +)( + /* no IO */ +); + `TEST_BENCH_INIT(TEST_NAME,`NUM_TEST_CASES,`NS_PER_TICK) + + //---------------------------------------------------- + // General test setup + //---------------------------------------------------- + + // Clocks and reset + `DEFINE_CLK(clk, 5.000, 50) + `DEFINE_RESET(rst, 0, 10) + + // Timekeeper (cycle counter) + logic [63:0] timestamp; + initial begin : timekeeper_blk + while (rst) @(posedge clk); + timestamp = 'd0; + while (~rst) begin + @(posedge clk); + timestamp = timestamp + 'd1; + end + end + + //---------------------------------------------------- + // Instantiate traffic generators, checkers, buses + //---------------------------------------------------- + localparam FILE_PATH = {`WORKING_DIR, "/data/", ROUTER_IMPL}; + + // Data buses + axis_t #(.DWIDTH(ROUTER_DWIDTH), .NUM_STREAMS(ROUTER_PORTS)) src2rtr_axis (.clk(clk)); + axis_t #(.DWIDTH(ROUTER_DWIDTH), .NUM_STREAMS(ROUTER_PORTS)) rtr2snk_axis (.clk(clk)); + + // Control buses + settings_bus_master #(.SR_AWIDTH(16), .SR_DWIDTH(32)) rtr_sb (.clk(clk)); + wire rtr_sb_ack; + + // Test vector source and sink instantiation + logic [7:0] set_injection_rate; + logic [15:0] set_lines_per_pkt; + logic [7:0] set_traffic_patt; + logic [31:0] set_num_pkts_to_send; + logic snk_start_stb = 0; + logic src_start_stb = 0; + + wire [63:0] session_duration [0:ROUTER_PORTS-1]; + wire [ROUTER_PORTS-1:0] src_active; + wire [31:0] src_xfer_count [0:ROUTER_PORTS-1]; + wire [31:0] src_pkt_count [0:ROUTER_PORTS-1]; + wire [ROUTER_PORTS-1:0] snk_active; + wire [31:0] snk_xfer_count [0:ROUTER_PORTS-1]; + wire [31:0] snk_pkt_count [0:ROUTER_PORTS-1]; + wire [31:0] snk_data_err_count [0:ROUTER_PORTS-1]; + wire [31:0] snk_route_err_count[0:ROUTER_PORTS-1]; + + wire deadlock_detected; + reg deadlock_detected_del = 1'b0; + always @(posedge clk) deadlock_detected_del <= deadlock_detected; + wire deadlock_re = (deadlock_detected & ~deadlock_detected_del); + wire deadlock_fe = (~deadlock_detected & deadlock_detected_del); + + genvar i; + generate for (i = 0; i < ROUTER_PORTS; i=i+1) begin: src_snk_blk + chdr_traffic_source_sim #( + .WIDTH (ROUTER_DWIDTH), + .MTU (MTU_LOG2), + .NODE_ID (i), + .NUM_NODES (ROUTER_PORTS) + ) traffic_src ( + .clk (clk), + .rst (rst), + .current_time (timestamp), + .start_stb (src_start_stb & (i < NUM_MASTERS)), + .injection_rate (set_injection_rate), + .lines_per_pkt (set_lines_per_pkt), + .traffic_patt (set_traffic_patt), + .num_pkts_to_send (set_num_pkts_to_send), + .m_axis_tdata (src2rtr_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]), + .m_axis_tlast (src2rtr_axis.tlast[i]), + .m_axis_tvalid (src2rtr_axis.tvalid[i]), + .m_axis_tready (src2rtr_axis.tready[i]), + .session_active (src_active[i]), + .session_duration (session_duration[i]), + .xfer_count (src_xfer_count[i]), + .pkt_count (src_pkt_count[i]) + ); + + chdr_traffic_sink_sim #( + .WIDTH (ROUTER_DWIDTH), + .MTU (MTU_LOG2), + .NODE_ID (i), + .NUM_NODES (ROUTER_PORTS), + .FILE_PATH (TEST_GEN_LL_FILES==1 ? FILE_PATH : "") + ) traffic_sink ( + .clk (clk), + .rst (rst), + .current_time (timestamp), + .start_stb (snk_start_stb), + .injection_rate (set_injection_rate), + .lines_per_pkt (set_lines_per_pkt), + .traffic_patt (set_traffic_patt), + .s_axis_tdata (rtr2snk_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]), + .s_axis_tlast (rtr2snk_axis.tlast[i]), + .s_axis_tvalid (rtr2snk_axis.tvalid[i]), + .s_axis_tready (rtr2snk_axis.tready[i]), + .session_active (snk_active[i]), + .xfer_count (snk_xfer_count[i]), + .pkt_count (snk_pkt_count[i]), + .data_err_count (snk_data_err_count[i]), + .route_err_count (snk_route_err_count[i]) + ); + end endgenerate + + //---------------------------------------------------- + // Instantiate DUT + //---------------------------------------------------- + generate if (ROUTER_IMPL == "FIFO") begin + for (i = 0; i < ROUTER_PORTS; i=i+1) begin + axi_fifo #( + .WIDTH(ROUTER_DWIDTH+1), .SIZE(0) + ) fifo_i ( + .clk (clk), + .reset (rst), + .clear (1'b0), + .i_tdata ({src2rtr_axis.tlast[i], src2rtr_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]}), + .i_tvalid (src2rtr_axis.tvalid[i]), + .i_tready (src2rtr_axis.tready[i]), + .o_tdata ({rtr2snk_axis.tlast[i], rtr2snk_axis.tdata[((i+1)*ROUTER_DWIDTH)-1:i*ROUTER_DWIDTH]}), + .o_tvalid (rtr2snk_axis.tvalid[i]), + .o_tready (rtr2snk_axis.tready[i]), + .space (), + .occupied () + ); + end + end else if (ROUTER_IMPL == "axi_crossbar") begin + axi_crossbar #( + .BASE (0), + .FIFO_WIDTH (ROUTER_DWIDTH), + .DST_WIDTH (16), + .NUM_INPUTS (ROUTER_PORTS), + .NUM_OUTPUTS (ROUTER_PORTS) + ) router_dut_i ( + // General + .clk (clk), + .reset (rst), + .clear (1'b0), + .local_addr (8'd0), + // Inputs + .i_tdata (src2rtr_axis.tdata), + .i_tlast (src2rtr_axis.tlast), + .i_tvalid (src2rtr_axis.tvalid), + .i_tready (src2rtr_axis.tready), + .pkt_present (src2rtr_axis.tvalid), + // Output + .o_tdata (rtr2snk_axis.tdata), + .o_tlast (rtr2snk_axis.tlast), + .o_tvalid (rtr2snk_axis.tvalid), + .o_tready (rtr2snk_axis.tready), + // Setting Bus + .set_stb (rtr_sb.settings_bus.set_stb), + .set_addr (rtr_sb.settings_bus.set_addr), + .set_data (rtr_sb.settings_bus.set_data), + // Readback bus + .rb_rd_stb (1'b0), + .rb_addr ({(2*$clog2(ROUTER_PORTS)){1'b0}}), + .rb_data () + ); + end else if (ROUTER_IMPL == "chdr_crossbar_nxn") begin + chdr_crossbar_nxn #( + .CHDR_W (ROUTER_DWIDTH), + .NPORTS (ROUTER_PORTS), + .DEFAULT_PORT (0), + .MTU (MTU_LOG2), + .ROUTE_TBL_SIZE (6), + .MUX_ALLOC ("ROUND-ROBIN"), + .OPTIMIZE ("AREA"), + .NPORTS_MGMT (0), + .EXT_RTCFG_PORT (1) + ) router_dut_i ( + // General + .clk (clk), + .reset (rst), + // Inputs + .s_axis_tdata (src2rtr_axis.tdata), + .s_axis_tlast (src2rtr_axis.tlast), + .s_axis_tvalid (src2rtr_axis.tvalid), + .s_axis_tready (src2rtr_axis.tready), + // Output + .m_axis_tdata (rtr2snk_axis.tdata), + .m_axis_tlast (rtr2snk_axis.tlast), + .m_axis_tvalid (rtr2snk_axis.tvalid), + .m_axis_tready (rtr2snk_axis.tready), + // External router config + .ext_rtcfg_stb (rtr_sb.settings_bus.set_stb), + .ext_rtcfg_addr (rtr_sb.settings_bus.set_addr), + .ext_rtcfg_data (rtr_sb.settings_bus.set_data), + .ext_rtcfg_ack (rtr_sb_ack) + ); + end else begin + axis_ctrl_crossbar_nxn #( + .WIDTH (ROUTER_DWIDTH), + .NPORTS (ROUTER_PORTS), + .TOPOLOGY (ROUTER_IMPL == "axis_ctrl_2d_torus" ? "TORUS" : "MESH"), + .INGRESS_BUFF_SIZE(MTU_LOG2), + .ROUTER_BUFF_SIZE (MTU_LOG2), + .ROUTING_ALLOC ("WORMHOLE"), + .SWITCH_ALLOC ("PRIO") + ) router_dut_i ( + // General + .clk (clk), + .reset (rst), + // Inputs + .s_axis_tdata (src2rtr_axis.tdata), + .s_axis_tlast (src2rtr_axis.tlast), + .s_axis_tvalid (src2rtr_axis.tvalid), + .s_axis_tready (src2rtr_axis.tready), + // Output + .m_axis_tdata (rtr2snk_axis.tdata), + .m_axis_tlast (rtr2snk_axis.tlast), + .m_axis_tvalid (rtr2snk_axis.tvalid), + .m_axis_tready (rtr2snk_axis.tready), + // Deadlock detection + .deadlock_detected(deadlock_detected) + ); + end endgenerate + + //---------------------------------------------------- + // Test routine. Runs tests and writes metrics to file + //---------------------------------------------------- + + // Constants + localparam [7:0] TRAFFIC_PATT_LOOPBACK = 8'd76; //L + localparam [7:0] TRAFFIC_PATT_NEIGHBOR = 8'd78; //N + localparam [7:0] TRAFFIC_PATT_BIT_COMPLEMENT = 8'd67; //C + localparam [7:0] TRAFFIC_PATT_SEQUENTIAL = 8'd83; //S + localparam [7:0] TRAFFIC_PATT_UNIFORM = 8'd85; //U + localparam [7:0] TRAFFIC_PATT_UNIFORM_OTHERS = 8'd79; //O + localparam [7:0] TRAFFIC_PATT_RANDOM_PERM = 8'd82; //R + + string filename; + integer node; + integer session = 0; + integer handle = 0; + logic [63:0] start_time; + integer total_pkts_recvd = 0, total_pkts_sent = 0; + + task sim_dataflow; + input [7:0] injection_rate; + input [7:0] traffic_patt; + input [15:0] lines_per_pkt; + input [31:0] num_pkts_to_send; + begin + session = session + 1; + $display("--------------- New Simulation ---------------"); + $display("- Module = %s", ROUTER_IMPL); + $display("- Nodes = %00d", ROUTER_PORTS); + $display("- Injection Rate = %00d%%", injection_rate); + $display("- Traffic Pattern = %c", traffic_patt); + $display("- Packet Size = %00d words (%00d bits)", lines_per_pkt, ROUTER_DWIDTH); + $display("- Max Packets = %00d", num_pkts_to_send); + // Configure settings + @(posedge clk); + set_injection_rate = injection_rate; + set_lines_per_pkt = lines_per_pkt; + set_traffic_patt = traffic_patt; + set_num_pkts_to_send = num_pkts_to_send; + @(posedge clk); + // Start the sink then the source + $display("Data flow starting..."); + snk_start_stb = 1; + src_start_stb = 1; + @(posedge clk); + src_start_stb = 0; + snk_start_stb = 0; + @(posedge clk); + start_time = timestamp; + // Wait for source blocks to finish generating + $display("Waiting for packets to transmit... (may take a while)"); + while (|src_active) begin + @(posedge clk); + if (deadlock_re) $display("WARNING: Deadlock detected"); + if (deadlock_fe) $display("Recovered from deadlock"); + end + // Wait for sink blocks to finish consuming + $display("All packets transmitted. Waiting to flush..."); + while (|snk_active) @(posedge clk); + // If router deadlocks then wait for it to recover + if (deadlock_detected) begin + $display("Waiting for deadlock recovery to finish..."); + while (deadlock_detected) @(posedge clk); + end + repeat(set_lines_per_pkt) @(posedge clk); + // Record summary to file and print to console + $sformat(filename, "%s/info_inj%03d_lpp%05d_traffic%c_sess%04d.csv", + FILE_PATH, injection_rate, lines_per_pkt, traffic_patt, session); + if (TEST_GEN_LL_FILES == 1) begin + handle = $fopen(filename, "w"); + if (handle == 0) begin + $error("Could not open file: %s", filename); + $finish(); + end + end + if (handle != 0) $fdisplay(handle, "Impl,Node,TxPkts,RxPkts,Duration,ErrRoute,ErrData"); + total_pkts_sent = 0; + total_pkts_recvd = 0; + for (node = 0; node < ROUTER_PORTS; node=node+1) begin + $display("- Node #%03d: TX = %5d pkts, RX = %5d pkts, Inj Rate = %3d%%. Errs = %5d route, %5d data", + node,src_pkt_count[node], snk_pkt_count[node], ((src_xfer_count[node]*100)/session_duration[node]), + snk_route_err_count[node], snk_data_err_count[node]); + if (handle != 0) $fdisplay(handle, "%s,%00d,%00d,%00d,%00d,%00d,%00d", ROUTER_IMPL, + node,src_pkt_count[node], snk_pkt_count[node], session_duration[node], + snk_route_err_count[node], snk_data_err_count[node]); + total_pkts_sent = total_pkts_sent + src_pkt_count[node]; + total_pkts_recvd = total_pkts_recvd + snk_pkt_count[node]; + `ASSERT_ERROR(snk_route_err_count[node] == 0, "Routing errors. Received packets destined to other nodes"); + `ASSERT_ERROR(snk_data_err_count[node] == 0, "Integrity errors. Received corrupted packets"); + end + $display("Finished. Elapsed = %00d cycles, TX = %00d pkts, RX = %00d pkts", + (timestamp - start_time), total_pkts_sent, total_pkts_recvd); + `ASSERT_ERROR(total_pkts_recvd == total_pkts_sent, "Total # TX packets did not match the total # RX packets"); + if (handle != 0) $fclose(handle); + $display("----------------------------------------------"); + end + endtask + + //---------------------------------------------------- + // Main test loop + //---------------------------------------------------- + + logic [31:0] MAX_PACKETS = TEST_MAX_PACKETS; + logic [15:0] LPP = TEST_LPP; + integer MIN_INJ_RATE = TEST_MIN_INJ_RATE; + integer MAX_INJ_RATE = TEST_MAX_INJ_RATE; + integer INJ_RATE_INCR = TEST_INJ_RATE_INCR; + + integer inj_rate = 0; + initial begin : tb_main + src_start_stb = 0; + snk_start_stb = 0; + rtr_sb.reset(); + while (rst) @(posedge clk); + + repeat (10) @(posedge clk); + + `TEST_CASE_START("Set up crossbar"); + for (node = 0; node < ROUTER_PORTS; node=node+1) begin + if (ROUTER_IMPL == "axi_crossbar") begin + rtr_sb.write(16'd256 + node[15:0], {16'h0, node[15:0]}); + end else if (ROUTER_IMPL == "chdr_crossbar_nxn") begin + rtr_sb.write(node[15:0], {16'h0, node[15:0]}); + while (~rtr_sb_ack) @(posedge clk); + end + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate LOOPBACK Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_LOOPBACK, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate SEQUENTIAL Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_SEQUENTIAL, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate UNIFORM Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_UNIFORM, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate UNIFORM_OTHERS Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_UNIFORM_OTHERS, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate BIT_COMPLEMENT Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_BIT_COMPLEMENT, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_CASE_START("Simulate NEIGHBOR Traffic Pattern"); + for (inj_rate = MIN_INJ_RATE; inj_rate <= MAX_INJ_RATE; inj_rate = inj_rate + INJ_RATE_INCR) begin + sim_dataflow(inj_rate, TRAFFIC_PATT_NEIGHBOR, LPP, MAX_PACKETS); + end + `TEST_CASE_DONE(1) + + `TEST_BENCH_DONE + end // initial begin + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py new file mode 100755 index 000000000..35821c2c4 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/gen_load_latency_graph.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +# +# Copyright 2018 Ettus Research, A National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# +# Description +# Parses the output files generated by crossbar_tb and outputs +# a load-latency graph and a expected-actual throughput graph + +import os, sys +import argparse +import time +import glob +import csv +import re +import numpy as np + +import matplotlib +#matplotlib.use('Agg') +import matplotlib.pyplot as plt + +def get_options(): + parser = argparse.ArgumentParser(description='Generate Load Latency Graphs') + parser.add_argument('datadir', type=str, default='.', help='Location of packet capture files generated by testbench') + return parser.parse_args() + +TRAFFIC_PATTERNS = {'U':'UNIFORM', 'O':'UNIFORM_OTHERS', 'N':'NEIGHBOR', 'L':'LOOPBACK', 'S':'SEQUENTIAL', 'C':'BIT_COMPLEMENT', 'R':'RANDOM_PERM'} + +class InfoFile(): + def __init__(self, filename): + # Extract test info from filename + m = re.search(r".*/info_inj([0-9]+)_lpp([0-9]+)_traffic(.)_sess([0-9]+)\.csv", filename) + if m is None: + raise ValueError('Incorrect filename format: %s'%(filename)) + self.inj_rate = int(m.group(1)) + self.lpp = int(m.group(2)) + self.traffic_patt = TRAFFIC_PATTERNS[m.group(3)] + self.session = int(m.group(4)) + + self.tx_pkts = 0 + self.rx_pkts = 0 + self.duration = 0 + self.errs = 0 + self.nodes = 0 + with open(filename, 'r') as csvfile: + reader = csv.reader(csvfile, delimiter=',') + isheader = True + for row in reader: + if isheader: + isheader = False + if row != ['Impl', 'Node', 'TxPkts', 'RxPkts', 'Duration', 'ErrRoute', 'ErrData']: + raise ValueError('Incorrect header: %s'%(filename)) + else: + self.impl = row[0] + self.tx_pkts = self.tx_pkts + int(row[2]) + self.rx_pkts = self.tx_pkts + int(row[3]) + self.duration = self.duration + int(row[4]) + self.errs = self.errs + int(row[5]) + int(row[6]) + self.nodes = self.nodes + 1 + self.real_inj_rate = (100.0 * self.tx_pkts * self.lpp) / self.duration + +class PktFile(): + def __init__(self, filename): + # Extract test info from filename + m = re.search(r".*/pkts_node([0-9]+)_inj([0-9]+)_lpp([0-9]+)_traffic(.)_sess([0-9]+)\.csv", filename) + if m is None: + raise ValueError('Incorrect filename format: %s'%(filename)) + self.node = int(m.group(1)) + self.inj_rate = int(m.group(2)) + self.lpp = int(m.group(3)) + self.traffic_patt = TRAFFIC_PATTERNS[m.group(4)] + self.session = int(m.group(5)) + + self.latencies = [] + with open(filename, 'r') as csvfile: + reader = csv.reader(csvfile, delimiter=',') + isheader = True + for row in reader: + if isheader: + isheader = False + if row != ['Src', 'Dst', 'Seqno', 'Error', 'Latency']: + raise ValueError('Incorrect header: %s'%(filename)) + else: + self.latencies.append(int(row[4])) + + +######################################################################## +# main +######################################################################## +if __name__=='__main__': + options = get_options() + + if (not os.path.isdir(options.datadir)): + print('ERROR: Data director %s does not exist'%(options.datadir)) + sys.exit(1) + + info_db = dict() + info_files = glob.glob(os.path.join(options.datadir, 'info*.csv')) + router_impl = '' + lines_per_pkt = 0 + for ifile in info_files: + print('INFO: Reading %s...'%(ifile)) + tmp = InfoFile(ifile) + router_impl = tmp.impl # Assume that all files have the same impl + lines_per_pkt = tmp.lpp # Assume that all files have the same LPP + info_db[(tmp.lpp, tmp.traffic_patt, tmp.inj_rate)] = tmp + + pkt_db = dict() + pkts_files = glob.glob(os.path.join(options.datadir, 'pkts*.csv')) + for pfile in pkts_files: + print('INFO: Reading %s...'%(pfile)) + tmp = PktFile(pfile) + config_key = (tmp.lpp, tmp.traffic_patt) + if config_key not in pkt_db: + pkt_db[config_key] = dict() + if tmp.inj_rate not in pkt_db[config_key]: + pkt_db[config_key][tmp.inj_rate] = [] + + + pkt_db[config_key][tmp.inj_rate].extend(tmp.latencies) + + # Write load-latency plots to file + actual_inj_rate_db = dict() + for config in sorted(pkt_db): + (lpp, traffic_patt) = config + ll_file = 'load-latency_%s_traffic-%s_lpp-%d.png'%(router_impl, traffic_patt, lpp) + print('INFO: Writing file ' + ll_file + '...') + percentile = [0, 25, 50, 75, 90, 95, 99, 99.9, 100] + plt.figure() + plt.title('Load Latency Graph for %s\n(Traffic: %s, LPP: %d)'%(router_impl, traffic_patt, lpp)) + for p in percentile: + plot_data = dict() + for inj_rate in pkt_db[config]: + real_inj_rate = info_db[(lpp, traffic_patt, inj_rate)].real_inj_rate + plot_data[real_inj_rate] = np.percentile(pkt_db[config][inj_rate], p) + latencies = [] + rates = [] + for inj_rate in sorted(plot_data): + rates.append(inj_rate) + latencies.append(plot_data[inj_rate]) + plt.plot(rates, latencies, label='$P_{%.1f}$'%(p)) + plt.xlabel('Load (%)') + plt.xticks(range(0, 110, 10)) + plt.ylabel('Latency (cycles)') + plt.grid(True) + plt.legend() + plt.savefig(os.path.join(options.datadir, ll_file), dpi=120) + # Generate actual inj_rate graph + real_inj_rates = [] + for inj_rate in sorted(pkt_db[config]): + real_inj_rates.append(info_db[(lpp, traffic_patt, inj_rate)].real_inj_rate) + actual_inj_rate_db[config] = (sorted(pkt_db[config]), real_inj_rates) + + # Write offered vs actual injection rate plots to file + injrate_file = 'injection-rate_%s_lpp-%d.png'%(router_impl, lines_per_pkt) + print('INFO: Writing file ' + injrate_file + '...') + plt.figure() + plt.title('Max Injection Rate Graph for %s'%(router_impl)) + for config in actual_inj_rate_db: + (x, y) = actual_inj_rate_db[config] + plt.plot(x, y, label=str(config)) + plt.xlabel('Offered Injection Rate (%)') + plt.xticks(range(0, 110, 10)) + plt.ylabel('Accepted Injection Rate (%)') + plt.yticks(range(0, 110, 10)) + plt.grid(True) + plt.legend() + plt.savefig(os.path.join(options.datadir, injrate_file), dpi=120)
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py new file mode 100755 index 000000000..8e546fef9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/crossbar_tb/run_sim_multi.py @@ -0,0 +1,106 @@ +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# +# Description +# Run the crossbar testbench (crossbar_tb) for varios parameter +# configurations and generates load-latency graphs for each run. + +import argparse +import math +import os, sys +import shutil +import glob +import subprocess + +g_tb_top_template = """ +`timescale 1ns/1ps +module crossbar_tb_auto(); + crossbar_tb #( + .TEST_NAME ("crossbar_tb_auto"), + .ROUTER_IMPL ("{rtr_impl}"), + .ROUTER_PORTS ({rtr_ports}), + .ROUTER_DWIDTH ({rtr_width}), + .MTU_LOG2 ({rtr_mtu}), + .NUM_MASTERS ({rtr_sources}), + .TEST_MAX_PACKETS ({tst_maxpkts}), + .TEST_LPP ({tst_lpp}), + .TEST_MIN_INJ_RATE ({tst_injrate_min}), + .TEST_MAX_INJ_RATE ({tst_injrate_max}), + .TEST_INJ_RATE_INCR (10), + .TEST_GEN_LL_FILES (1) + ) impl ( + /* no IO */ + ); +endmodule +""" + +g_test_params = { + 'data': {'rtr_width':64, 'rtr_mtu':7, 'tst_maxpkts':100, 'tst_lpp':100, 'tst_injrate_min':30, 'tst_injrate_max':100}, + 'ctrl': {'rtr_width':64, 'rtr_mtu':5, 'tst_maxpkts':100, 'tst_lpp':20, 'tst_injrate_min':10, 'tst_injrate_max':50}, +} + +g_xb_types = { + 'chdr_crossbar_nxn':'data', 'axi_crossbar':'data', + 'axis_ctrl_2d_torus':'ctrl', 'axis_ctrl_2d_mesh':'ctrl' +} + +def get_options(): + parser = argparse.ArgumentParser(description='Run correctness sim and generate load-latency plots') + parser.add_argument('--impl', type=str, default='chdr_crossbar_nxn', help='Implementation (CSV) [%s]'%(','.join(g_xb_types.keys()))) + parser.add_argument('--ports', type=str, default='16', help='Number of ports (CSV)') + parser.add_argument('--sources', type=str, default='16', help='Number of active data sources (masters)') + return parser.parse_args() + +def launch_run(impl, ports, sources): + run_name = '%s_ports%d_srcs%d'%(impl, ports, sources) + # Prepare a transform map to autogenerate a TB file + transform = {'rtr_impl':impl, 'rtr_ports':ports, 'rtr_sources':sources} + for k,v in g_test_params[g_xb_types[impl]].items(): + transform[k] = v + # Create crossbar_tb_auto.sv with specified parameters + with open('crossbar_tb_auto.sv', 'w') as out_file: + out_file.write(g_tb_top_template.format(**transform)) + # Create data directory for the simulation + data_dir = os.path.join('data', impl) + export_dir = os.path.join('data', run_name) + try: + os.makedirs('data') + except FileExistsError: + pass + os.makedirs(data_dir) + os.makedirs(export_dir) + # Run "make xsim" + exitcode = subprocess.Popen('make xsim TB_TOP_MODULE=crossbar_tb_auto', shell=True).wait() + if exitcode != 0: + raise RuntimeError('Error running "make xsim". Was setupenv.sh run?') + # Generate load-latency graphs + exitcode = subprocess.Popen('gen_load_latency_graph.py ' + data_dir, shell=True).wait() + if exitcode != 0: + raise RuntimeError('Error running "gen_load_latency_graph.py"') + # Copy files + os.rename('xsim.log', os.path.join(export_dir, 'xsim.log')) + for file in glob.glob(os.path.join(data_dir, '*.png')): + shutil.copy(file, export_dir) + # Cleanup outputs + subprocess.Popen('make cleanall', shell=True).wait() + try: + os.remove('crossbar_tb_auto.sv') + except FileNotFoundError: + pass + try: + shutil.rmtree(data_dir) + except OSError: + print('WARNING: Could not delete ' + data_dir) + +def main(): + args = get_options(); + for impl in args.impl.strip().split(','): + for ports in args.ports.strip().split(','): + for sources in args.sources.strip().split(','): + launch_run(impl, int(ports), min(int(ports), int(sources))) + +if __name__ == '__main__': + main() diff --git a/fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py b/fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py new file mode 100755 index 000000000..a2eaf71fb --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/gen_node_to_coord_mapping.py @@ -0,0 +1,125 @@ +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +import argparse +import math +import sys +import datetime +import random + +# Parse command line options +# ------------------------------------------------ +def get_options(): + parser = argparse.ArgumentParser(description='Generate a node to coordinate mapping file.') + parser.add_argument('--pattern', type=str, default='xy', choices=['xy', 'yx', 'spiral', 'random'], help='Node distribution pattern') + parser.add_argument('--dimsize', type=int, default=4, help='Maximum dimension size') + parser.add_argument('--seed', type=int, default=None, help='Seed for random permutation generator') + return parser.parse_args() + +# Pattern Generators +# ------------------------------------------------ +def gen_xy(N): + nodes = dict() + for y in range(N): + for x in range(N): + nodes[(y*N + x)] = (x, y) + return nodes + +def gen_yx(N): + nodes = dict() + for y in range(N): + for x in range(N): + nodes[(x*N + y)] = (x, y) + return nodes + +def gen_spiral(N): + nodes = dict() + x = y = 0 + dx = 0 + dy = -1 + for i in range(N**2): + if (-N/2 < x <= N/2) and (-N/2 < y <= N/2): + nodes[i] = (x + int(math.ceil(N/2)) - 1, y + int(math.ceil(N/2)) - 1) + if x == y or (x < 0 and x == -y) or (x > 0 and x == 1-y): + dx, dy = -dy, dx + x, y = x+dx, y+dy + return nodes + +def gen_random(N): + nodes = dict() + rnodes = random.sample(range(N*N), N*N) + for y in range(N): + for x in range(N): + nodes[rnodes[x*N + y]] = (x, y) + return nodes + +# Source Generators +# ------------------------------------------------ +def layout_nodes(nodes): + N = int(math.sqrt(len(nodes))) + #inv_nodes = {v: k for k, v in nodes.iteritems()} + coords = {v: k for k, v in nodes.items()} + lines = [] + for y in range(N): + line = '' + for x in range(N): + line += '%5d'%(coords[(x,y)]) + lines.append(line) + return lines + +def gen_vparams(nodes, N, pattern): + src_lines = [ '\n// DIM_SIZE = %d, PATTERN = %s'%(N,pattern.upper()), '//------------------------------------' ] + for l in layout_nodes(nodes): + src_lines.append('// ' + l) + bitw = math.ceil(math.log2(N)) + xvals = ','.join(['%d\'d%d'%(bitw,v[0]) for k, v in sorted(nodes.items(), reverse=True)]) + yvals = ','.join(['%d\'d%d'%(bitw,v[1]) for k, v in sorted(nodes.items(), reverse=True)]) + xpar = 'localparam [%d:0] XCOORD_DIM_%03d = {%s};'%(bitw*N*N-1, N, xvals) + ypar = 'localparam [%d:0] YCOORD_DIM_%03d = {%s};'%(bitw*N*N-1, N, yvals) + src_lines.append(xpar) + src_lines.append(ypar) + src_lines.append('') + return src_lines + +def gen_lookup_func(dim, N): + src_lines = [ 'function [CLOG2_DIM_SIZE-1:0] node_to_%sdst;'%(dim), ' input [WIDTH-1:0] header;', 'begin'] + dim_sizes = range(2, N+1) + for i in dim_sizes: + node_bitw = math.ceil(math.log2(i*i)) + dim_bitw = math.ceil(math.log2(i)) + prefix = ' ' if (i == dim_sizes[0]) else ' else ' + src_lines.append(prefix + 'if (DIM_SIZE == %d)'%(i)) + src_lines.append(' node_to_%sdst = %sCOORD_DIM_%03d[%d*header[%d:0] +: %d];'%(dim,dim.upper(),i,dim_bitw,node_bitw-1,dim_bitw)) + src_lines.append(' else') + src_lines.append(' node_to_%sdst = {CLOG2_DIM_SIZE{1\'d0}};'%(dim)) + src_lines.append('end endfunction\n\n') + return src_lines + +def gen_vheader(dimsize, mapgen, pattern, filename): + with open(filename, 'w') as vhfile: + vhfile.write('// Copyright %s Ettus Research, A National Instruments Company\n'%(datetime.datetime.now().year)) + vhfile.write('// SPDX-License-Identifier: LGPL-3.0-or-later\n') + vhfile.write('//\n') + vhfile.write('// Autogenerated file. Do not modify.\n') + vhfile.write('// $ %s\n'%(' '.join(sys.argv[:]))) + vhfile.write('\nparameter CLOG2_DIM_SIZE = $clog2(DIM_SIZE); //Vivado workaround\n\n') + for i in range(2, dimsize+1): + nodes = mapgen(i) + N = math.sqrt(len(nodes)) + vhfile.write('\n'.join(gen_vparams(nodes, N, pattern))) + vhfile.write('\n\n') + vhfile.write('\n'.join(gen_lookup_func('x', dimsize))) + vhfile.write('\n'.join(gen_lookup_func('y', dimsize))) + +def main(): + args = get_options(); + random.seed(args.seed) + generators = {'xy': gen_xy, 'yx': gen_yx, 'spiral':gen_spiral, 'random': gen_random} + gen_vheader(args.dimsize, generators[args.pattern], args.pattern, 'mesh_node_mapping.vh') + +if __name__ == '__main__': + main() diff --git a/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v new file mode 100644 index 000000000..e0338347b --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_multi_sw.v @@ -0,0 +1,481 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: mesh_2d_dor_router_multi_sw +// Description: +// Alternate implementation for mesh_2d_dor_router_single_sw with +// multiple switches for independent paths between inputs and outputs +// **NOTE**: This module has not been validated + +module mesh_2d_dor_router_multi_sw #( + parameter WIDTH = 64, + parameter DIM_SIZE = 4, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0, + parameter TERM_BUFF_SIZE = 5, + parameter XB_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", // Routing (switching) method {WORMHOLE, CUT-THROUGH} + parameter SWITCH_ALLOC = "PRIO" // Switch allocation algorithm {ROUND-ROBIN, PRIO} +) ( + // Clocks and resets + input wire clk, + input wire reset, + + // Terminal connections + input wire [WIDTH-1:0] s_axis_ter_tdata, + input wire s_axis_ter_tlast, + input wire s_axis_ter_tvalid, + output wire s_axis_ter_tready, + output wire [WIDTH-1:0] m_axis_ter_tdata, + output wire m_axis_ter_tlast, + output wire m_axis_ter_tvalid, + input wire m_axis_ter_tready, + + // West inter-router connections + input wire [WIDTH-1:0] s_axis_wst_tdata, + input wire [0:0] s_axis_wst_tdest, + input wire s_axis_wst_tlast, + input wire s_axis_wst_tvalid, + output wire s_axis_wst_tready, + output wire [WIDTH-1:0] m_axis_wst_tdata, + output wire [0:0] m_axis_wst_tdest, + output wire m_axis_wst_tlast, + output wire m_axis_wst_tvalid, + input wire m_axis_wst_tready, + + // East inter-router connections + input wire [WIDTH-1:0] s_axis_est_tdata, + input wire [0:0] s_axis_est_tdest, + input wire s_axis_est_tlast, + input wire s_axis_est_tvalid, + output wire s_axis_est_tready, + output wire [WIDTH-1:0] m_axis_est_tdata, + output wire [0:0] m_axis_est_tdest, + output wire m_axis_est_tlast, + output wire m_axis_est_tvalid, + input wire m_axis_est_tready, + + // North inter-router connections + input wire [WIDTH-1:0] s_axis_nor_tdata, + input wire [0:0] s_axis_nor_tdest, + input wire s_axis_nor_tlast, + input wire s_axis_nor_tvalid, + output wire s_axis_nor_tready, + output wire [WIDTH-1:0] m_axis_nor_tdata, + output wire [0:0] m_axis_nor_tdest, + output wire m_axis_nor_tlast, + output wire m_axis_nor_tvalid, + input wire m_axis_nor_tready, + + // South inter-router connections + input wire [WIDTH-1:0] s_axis_sou_tdata, + input wire [0:0] s_axis_sou_tdest, + input wire s_axis_sou_tlast, + input wire s_axis_sou_tvalid, + output wire s_axis_sou_tready, + output wire [WIDTH-1:0] m_axis_sou_tdata, + output wire [0:0] m_axis_sou_tdest, + output wire m_axis_sou_tlast, + output wire m_axis_sou_tvalid, + input wire m_axis_sou_tready +); + // ------------------------------------------------- + // Routing functions + // ------------------------------------------------- + `include "mesh_node_mapping.vh" + + function [2:0] term_route; + input [WIDTH-1:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + term_route = 3'd0; //TER + end else if (xdst == XB_ADDR_X) begin + if (ydiff < 0) + term_route = 3'd3; //NOR + else + term_route = 3'd4; //SOU + end else begin + if (xdiff < 0) + term_route = 3'd1; //WST + else + term_route = 3'd2; //EST + end + end + endfunction + + function [1:0] xdim_route; + input [WIDTH-1:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + xdim_route = 2'd0; //TER + end else if (xdst == XB_ADDR_X) begin + if (ydiff < 0) + xdim_route = 2'd2; //NOR + else + xdim_route = 2'd3; //SOU + end else begin + xdim_route = 2'd1; //Forward + end + end + endfunction + + function [0:0] ydim_route; + input [WIDTH-1:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + ydim_route = 1'd0; //TER + end else if (xdst == XB_ADDR_X) begin + ydim_route = 1'd1; //Forward + end + end + endfunction + + + // ------------------------------------------------- + // Input buffers + // ------------------------------------------------- + wire [WIDTH-1:0] ter_i_tdata; + wire ter_i_tlast; + wire ter_i_tvalid; + wire ter_i_tready; + + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE) + ) term_in_pkt_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_ter_tdata), + .i_tlast (s_axis_ter_tlast), + .i_tvalid (s_axis_ter_tvalid), + .i_tready (s_axis_ter_tready), + .i_terror (1'b0), + .o_tdata (ter_i_tdata), + .o_tlast (ter_i_tlast), + .o_tvalid (ter_i_tvalid), + .o_tready (ter_i_tready) + ); + + wire [WIDTH-1:0] wst_i_tdata, est_i_tdata, nor_i_tdata, sou_i_tdata; + wire wst_i_tlast, est_i_tlast, nor_i_tlast, sou_i_tlast; + wire wst_i_tvalid, est_i_tvalid, nor_i_tvalid, sou_i_tvalid; + wire wst_i_tready, est_i_tready, nor_i_tready, sou_i_tready; + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) wst_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_wst_tdata), + .s_axis_tdest (s_axis_wst_tdest), + .s_axis_tlast (s_axis_wst_tlast), + .s_axis_tvalid (s_axis_wst_tvalid), + .s_axis_tready (s_axis_wst_tready), + .m_axis_tdata (wst_i_tdata), + .m_axis_tlast (wst_i_tlast), + .m_axis_tvalid (wst_i_tvalid), + .m_axis_tready (wst_i_tready) + ); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) est_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_est_tdata), + .s_axis_tdest (s_axis_est_tdest), + .s_axis_tlast (s_axis_est_tlast), + .s_axis_tvalid (s_axis_est_tvalid), + .s_axis_tready (s_axis_est_tready), + .m_axis_tdata (est_i_tdata), + .m_axis_tlast (est_i_tlast), + .m_axis_tvalid (est_i_tvalid), + .m_axis_tready (est_i_tready) + ); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) nor_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_nor_tdata), + .s_axis_tdest (s_axis_nor_tdest), + .s_axis_tlast (s_axis_nor_tlast), + .s_axis_tvalid (s_axis_nor_tvalid), + .s_axis_tready (s_axis_nor_tready), + .m_axis_tdata (nor_i_tdata), + .m_axis_tlast (nor_i_tlast), + .m_axis_tvalid (nor_i_tvalid), + .m_axis_tready (nor_i_tready) + ); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) sou_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_sou_tdata), + .s_axis_tdest (s_axis_sou_tdest), + .s_axis_tlast (s_axis_sou_tlast), + .s_axis_tvalid (s_axis_sou_tvalid), + .s_axis_tready (s_axis_sou_tready), + .m_axis_tdata (sou_i_tdata), + .m_axis_tlast (sou_i_tlast), + .m_axis_tvalid (sou_i_tvalid), + .m_axis_tready (sou_i_tready) + ); + + // ------------------------------------------------- + // Input demuxes + // ------------------------------------------------- + + wire [WIDTH-1:0] t2t_tdata, t2w_tdata, t2e_tdata, t2n_tdata, t2s_tdata; + wire t2t_tlast, t2w_tlast, t2e_tlast, t2n_tlast, t2s_tlast; + wire t2t_tvalid, t2w_tvalid, t2e_tvalid, t2n_tvalid, t2s_tvalid; + wire t2t_tready, t2w_tready, t2e_tready, t2n_tready, t2s_tready; + + wire [WIDTH-1:0] w2t_tdata, w2e_tdata, w2n_tdata, w2s_tdata; + wire w2t_tlast, w2e_tlast, w2n_tlast, w2s_tlast; + wire w2t_tvalid, w2e_tvalid, w2n_tvalid, w2s_tvalid; + wire w2t_tready, w2e_tready, w2n_tready, w2s_tready; + + wire [WIDTH-1:0] e2t_tdata, e2w_tdata, e2n_tdata, e2s_tdata; + wire e2t_tlast, e2w_tlast, e2n_tlast, e2s_tlast; + wire e2t_tvalid, e2w_tvalid, e2n_tvalid, e2s_tvalid; + wire e2t_tready, e2w_tready, e2n_tready, e2s_tready; + + wire [WIDTH-1:0] n2t_tdata, n2s_tdata; + wire n2t_tlast, n2s_tlast; + wire n2t_tvalid, n2s_tvalid; + wire n2t_tready, n2s_tready; + + wire [WIDTH-1:0] s2t_tdata, s2n_tdata; + wire s2t_tlast, s2n_tlast; + wire s2t_tvalid, s2n_tvalid; + wire s2t_tready, s2n_tready; + + wire [WIDTH-1:0] ter_i_hdr, wst_i_hdr, est_i_hdr, nor_i_hdr, sou_i_hdr; + + axi_demux #( + .WIDTH(WIDTH), .SIZE(5), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) ter_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (ter_i_hdr), + .dest (term_route(ter_i_hdr)), + .i_tdata (ter_i_tdata), + .i_tlast (ter_i_tlast), + .i_tvalid (ter_i_tvalid), + .i_tready (ter_i_tready), + .o_tdata ({t2s_tdata, t2n_tdata, t2e_tdata, t2w_tdata, t2t_tdata}), + .o_tlast ({t2s_tlast, t2n_tlast, t2e_tlast, t2w_tlast, t2t_tlast}), + .o_tvalid ({t2s_tvalid, t2n_tvalid, t2e_tvalid, t2w_tvalid, t2t_tvalid}), + .o_tready ({t2s_tready, t2n_tready, t2e_tready, t2w_tready, t2t_tready}) + ); + + axi_demux #( + .WIDTH(WIDTH), .SIZE(4), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) wst_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (wst_i_hdr), + .dest (xdim_route(wst_i_hdr)), + .i_tdata (wst_i_tdata), + .i_tlast (wst_i_tlast), + .i_tvalid (wst_i_tvalid), + .i_tready (wst_i_tready), + .o_tdata ({w2s_tdata, w2n_tdata, w2e_tdata, w2t_tdata}), + .o_tlast ({w2s_tlast, w2n_tlast, w2e_tlast, w2t_tlast}), + .o_tvalid ({w2s_tvalid, w2n_tvalid, w2e_tvalid, w2t_tvalid}), + .o_tready ({w2s_tready, w2n_tready, w2e_tready, w2t_tready}) + ); + + axi_demux #( + .WIDTH(WIDTH), .SIZE(4), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) est_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (est_i_hdr), + .dest (xdim_route(est_i_hdr)), + .i_tdata (est_i_tdata), + .i_tlast (est_i_tlast), + .i_tvalid (est_i_tvalid), + .i_tready (est_i_tready), + .o_tdata ({e2s_tdata, e2n_tdata, e2w_tdata, e2t_tdata}), + .o_tlast ({e2s_tlast, e2n_tlast, e2w_tlast, e2t_tlast}), + .o_tvalid ({e2s_tvalid, e2n_tvalid, e2w_tvalid, e2t_tvalid}), + .o_tready ({e2s_tready, e2n_tready, e2w_tready, e2t_tready}) + ); + + axi_demux #( + .WIDTH(WIDTH), .SIZE(2), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) nor_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (nor_i_hdr), + .dest (ydim_route(nor_i_hdr)), + .i_tdata (nor_i_tdata), + .i_tlast (nor_i_tlast), + .i_tvalid (nor_i_tvalid), + .i_tready (nor_i_tready), + .o_tdata ({n2t_tdata, n2s_tdata}), + .o_tlast ({n2t_tlast, n2s_tlast}), + .o_tvalid ({n2t_tvalid, n2s_tvalid}), + .o_tready ({n2t_tready, n2s_tready}) + ); + + axi_demux #( + .WIDTH(WIDTH), .SIZE(2), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) sou_i_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (sou_i_hdr), + .dest (ydim_route(sou_i_hdr)), + .i_tdata (sou_i_tdata), + .i_tlast (sou_i_tlast), + .i_tvalid (sou_i_tvalid), + .i_tready (sou_i_tready), + .o_tdata ({s2t_tdata, s2n_tdata}), + .o_tlast ({s2t_tlast, s2n_tlast}), + .o_tvalid ({s2t_tvalid, s2n_tvalid}), + .o_tready ({s2t_tready, s2n_tready}) + ); + + // ------------------------------------------------- + // Output muxes + // ------------------------------------------------- + + axi_mux #( + .WIDTH(WIDTH), .SIZE(5), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) ter_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2t_tdata, w2t_tdata, e2t_tdata, n2t_tdata, s2t_tdata}), + .i_tlast ({t2t_tlast, w2t_tlast, e2t_tlast, n2t_tlast, s2t_tlast}), + .i_tvalid ({t2t_tvalid, w2t_tvalid, e2t_tvalid, n2t_tvalid, s2t_tvalid}), + .i_tready ({t2t_tready, w2t_tready, e2t_tready, n2t_tready, s2t_tready}), + .o_tdata (m_axis_ter_tdata), + .o_tlast (m_axis_ter_tlast), + .o_tvalid (m_axis_ter_tvalid), + .o_tready (m_axis_ter_tready) + ); + + axi_mux #( + .WIDTH(WIDTH), .SIZE(2), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) wst_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2w_tdata, e2w_tdata}), + .i_tlast ({t2w_tlast, e2w_tlast}), + .i_tvalid ({t2w_tvalid, e2w_tvalid}), + .i_tready ({t2w_tready, e2w_tready}), + .o_tdata (m_axis_wst_tdata), + .o_tlast (m_axis_wst_tlast), + .o_tvalid (m_axis_wst_tvalid), + .o_tready (m_axis_wst_tready) + ); + assign m_axis_wst_tdest = 1'b0; + + axi_mux #( + .WIDTH(WIDTH), .SIZE(2), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) est_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2e_tdata, w2e_tdata}), + .i_tlast ({t2e_tlast, w2e_tlast}), + .i_tvalid ({t2e_tvalid, w2e_tvalid}), + .i_tready ({t2e_tready, w2e_tready}), + .o_tdata (m_axis_est_tdata), + .o_tlast (m_axis_est_tlast), + .o_tvalid (m_axis_est_tvalid), + + + .o_tready (m_axis_est_tready) + ); + assign m_axis_est_tdest = 1'b0; + + axi_mux #( + .WIDTH(WIDTH), .SIZE(4), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) nor_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2n_tdata, w2n_tdata, e2n_tdata, s2n_tdata}), + .i_tlast ({t2n_tlast, w2n_tlast, e2n_tlast, s2n_tlast}), + .i_tvalid ({t2n_tvalid, w2n_tvalid, e2n_tvalid, s2n_tvalid}), + .i_tready ({t2n_tready, w2n_tready, e2n_tready, s2n_tready}), + .o_tdata (m_axis_nor_tdata), + .o_tlast (m_axis_nor_tlast), + .o_tvalid (m_axis_nor_tvalid), + .o_tready (m_axis_nor_tready) + ); + assign m_axis_nor_tdest = 1'b0; + + axi_mux #( + .WIDTH(WIDTH), .SIZE(4), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) sou_o_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2s_tdata, w2s_tdata, e2s_tdata, n2s_tdata}), + .i_tlast ({t2s_tlast, w2s_tlast, e2s_tlast, n2s_tlast}), + .i_tvalid ({t2s_tvalid, w2s_tvalid, e2s_tvalid, n2s_tvalid}), + .i_tready ({t2s_tready, w2s_tready, e2s_tready, n2s_tready}), + .o_tdata (m_axis_sou_tdata), + .o_tlast (m_axis_sou_tlast), + .o_tvalid (m_axis_sou_tvalid), + .o_tready (m_axis_sou_tready) + ); + assign m_axis_sou_tdest = 1'b0; + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v new file mode 100644 index 000000000..65cded545 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/mesh_2d_dor_router_single_sw.v @@ -0,0 +1,398 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: mesh_2d_dor_router_single_sw +// Description: +// This module implements the router for a 2-dimentional (2d) +// mesh network that uses dimension order routing (dor) and has a +// single underlying switch (single_sw). It uses AXI-Stream for all of its +// links. +// The mesh topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - WIDTH: Width of the AXI-Stream data bus +// - DIM_SIZE: Number of routers alone one dimension +// - XB_ADDR_X: The X-coordinate of this router in the topology +// - XB_ADDR_Y: The Y-coordinate of this router in the topology +// - TERM_BUFF_SIZE: log2 of the ingress terminal buffer size (in words) +// - XB_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words) +// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers. +// * WORMHOLE: Allocate route as soon as first word in pkt arrives +// * CUT-THROUGH: Allocate route only after the full pkt arrives +// - SWITCH_ALLOC: Algorithm to allocate the switch +// * PRIO: Priority based. Priority: Y-dim > X-dim > Term +// * ROUND-ROBIN: Round robin input port allocation +// Signals: +// - *_axis_ter_*: Terminal ports (master/slave) +// - *_axis_wst_*: Inter-router X-dim west connections (master/slave) +// - *_axis_est_*: Inter-router X-dim east connections (master/slave) +// - *_axis_nor_*: Inter-router X-dim north connections (master/slave) +// - *_axis_sou_*: Inter-router X-dim south connections (master/slave) +// + +module mesh_2d_dor_router_single_sw #( + parameter WIDTH = 64, + parameter DIM_SIZE = 4, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0, + parameter TERM_BUFF_SIZE = 5, + parameter XB_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", // Routing (switching) method {WORMHOLE, CUT-THROUGH} + parameter SWITCH_ALLOC = "PRIO" // Switch allocation algorithm {ROUND-ROBIN, PRIO} +) ( + // Clocks and resets + input wire clk, + input wire reset, + + // Terminal connections + input wire [WIDTH-1:0] s_axis_ter_tdata, + input wire s_axis_ter_tlast, + input wire s_axis_ter_tvalid, + output wire s_axis_ter_tready, + output wire [WIDTH-1:0] m_axis_ter_tdata, + output wire m_axis_ter_tlast, + output wire m_axis_ter_tvalid, + input wire m_axis_ter_tready, + + // West inter-router connections + input wire [WIDTH-1:0] s_axis_wst_tdata, + input wire [0:0] s_axis_wst_tdest, + input wire s_axis_wst_tlast, + input wire s_axis_wst_tvalid, + output wire s_axis_wst_tready, + output wire [WIDTH-1:0] m_axis_wst_tdata, + output wire [0:0] m_axis_wst_tdest, + output wire m_axis_wst_tlast, + output wire m_axis_wst_tvalid, + input wire m_axis_wst_tready, + + // East inter-router connections + input wire [WIDTH-1:0] s_axis_est_tdata, + input wire [0:0] s_axis_est_tdest, + input wire s_axis_est_tlast, + input wire s_axis_est_tvalid, + output wire s_axis_est_tready, + output wire [WIDTH-1:0] m_axis_est_tdata, + output wire [0:0] m_axis_est_tdest, + output wire m_axis_est_tlast, + output wire m_axis_est_tvalid, + input wire m_axis_est_tready, + + // North inter-router connections + input wire [WIDTH-1:0] s_axis_nor_tdata, + input wire [0:0] s_axis_nor_tdest, + input wire s_axis_nor_tlast, + input wire s_axis_nor_tvalid, + output wire s_axis_nor_tready, + output wire [WIDTH-1:0] m_axis_nor_tdata, + output wire [0:0] m_axis_nor_tdest, + output wire m_axis_nor_tlast, + output wire m_axis_nor_tvalid, + input wire m_axis_nor_tready, + + // South inter-router connections + input wire [WIDTH-1:0] s_axis_sou_tdata, + input wire [0:0] s_axis_sou_tdest, + input wire s_axis_sou_tlast, + input wire s_axis_sou_tvalid, + output wire s_axis_sou_tready, + output wire [WIDTH-1:0] m_axis_sou_tdata, + output wire [0:0] m_axis_sou_tdest, + output wire m_axis_sou_tlast, + output wire m_axis_sou_tvalid, + input wire m_axis_sou_tready +); + // ------------------------------------------------- + // Routing functions + // ------------------------------------------------- + + // mesh_node_mapping.vh file contains the mapping between the node number + // and its XY coordinates. It is autogenerated and defines the node_to_xdst + // and node_to_ydst functions. + `include "mesh_node_mapping.vh" + + localparam [2:0] SW_DEST_TER = 3'd0; + localparam [2:0] SW_DEST_WST = 3'd1; + localparam [2:0] SW_DEST_EST = 3'd2; + localparam [2:0] SW_DEST_NOR = 3'd3; + localparam [2:0] SW_DEST_SOU = 3'd4; + localparam [2:0] SW_NUM_DESTS = 3'd5; + + // The compute_switch_tdest function is the destination selector + // i.e. it will inspecte the bottom $clog2(DIM_SIZE)*2 bits of the + // first word of a packet and determine the destination of the packet. + function [3:0] compute_switch_tdest; + input [WIDTH-1:0] header; + input [3:0] src; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, 3 LSBs are the router destination + // - VC in a mesh is always 0 + if (xdiff == 'd0 && ydiff == 'd0) begin + // VC=0 because terminals don't have VCs + compute_switch_tdest = {1'b0, SW_DEST_TER}; + end else if (xdiff == 'd0) begin + // VC=1 for CCW turns and VC=0 for everything else + if (ydiff < 0) + compute_switch_tdest = {(src == SW_DEST_WST), SW_DEST_NOR}; + else + compute_switch_tdest = {(src == SW_DEST_EST), SW_DEST_SOU}; + end else begin + // VC=0 because east-west paths don't have VCs + if (xdiff < 0) + compute_switch_tdest = {1'b0, SW_DEST_WST}; + else + compute_switch_tdest = {1'b0, SW_DEST_EST}; + end + if (xdst != 'hx && ydst != 'hx) begin + if (XB_ADDR_X == 0 && compute_switch_tdest == SW_DEST_WST) + $display("Illegal route chosen: WEST. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y); + if (XB_ADDR_X == DIM_SIZE-1 && compute_switch_tdest == SW_DEST_EST) + $display("Illegal route chosen: EAST. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y); + if (XB_ADDR_Y == 0 && compute_switch_tdest == SW_DEST_NOR) + $display("Illegal route chosen: NORTH. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y); + if (XB_ADDR_Y == DIM_SIZE-1 && compute_switch_tdest == SW_DEST_SOU) + $display("Illegal route chosen: SOUTH. xdst=%d, ydst=%d, xaddr=%d, yaddr=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y); + end + //$display("xdst=%d, ydst=%d, xaddr=%d, yaddr=%d, dst=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y, compute_switch_tdest); + end + endfunction + + // The compute_switch_alloc function is the switch allocation function + // i.e. it chooses which input port reserves the switch for packet transfer. + // After the switch is allocated, all other ports will be backpressured until + // the packet finishes transferring. + function [2:0] compute_switch_alloc; + input [4:0] pkt_waiting; + input [2:0] last_alloc; + begin + if (pkt_waiting == 5'b00000) begin + compute_switch_alloc = SW_DEST_TER; + end else if (pkt_waiting == 5'b00001) begin + compute_switch_alloc = SW_DEST_TER; + end else if (pkt_waiting == 5'b00010) begin + compute_switch_alloc = SW_DEST_WST; + end else if (pkt_waiting == 5'b00100) begin + compute_switch_alloc = SW_DEST_EST; + end else if (pkt_waiting == 5'b01000) begin + compute_switch_alloc = SW_DEST_NOR; + end else if (pkt_waiting == 5'b10000) begin + compute_switch_alloc = SW_DEST_SOU; + end else begin + if (SWITCH_ALLOC == "PRIO") begin + // Priority: South > East > North > West > Term + if (pkt_waiting[SW_DEST_SOU]) + compute_switch_alloc = SW_DEST_SOU; + else if (pkt_waiting[SW_DEST_EST]) + compute_switch_alloc = SW_DEST_EST; + else if (pkt_waiting[SW_DEST_NOR]) + compute_switch_alloc = SW_DEST_NOR; + else if (pkt_waiting[SW_DEST_WST]) + compute_switch_alloc = SW_DEST_WST; + else + compute_switch_alloc = SW_DEST_TER; + end else begin + // Round-robin + if (pkt_waiting[(last_alloc + 3'd1) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd1) % SW_NUM_DESTS; + else if (pkt_waiting[(last_alloc + 3'd2) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd2) % SW_NUM_DESTS; + else if (pkt_waiting[(last_alloc + 3'd3) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd3) % SW_NUM_DESTS; + else if (pkt_waiting[(last_alloc + 3'd4) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd4) % SW_NUM_DESTS; + else + compute_switch_alloc = last_alloc; + end + end + //$display("pkt_waiting=%b, alloc=%d, last_alloc=%d", pkt_waiting, compute_switch_alloc, last_alloc); + end + endfunction + + // ------------------------------------------------- + // Input buffers + // ------------------------------------------------- + wire [WIDTH-1:0] ter_i_tdata; + wire [3:0] ter_i_tdest; + wire ter_i_tlast; + wire ter_i_tvalid; + wire ter_i_tready; + + // Data coming in from the terminal is gated until a full packet arrives + // in order to minimize the switch allocation time per packet. + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE) + ) term_in_pkt_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_ter_tdata), + .i_tlast (s_axis_ter_tlast), + .i_tvalid (s_axis_ter_tvalid), + .i_tready (s_axis_ter_tready), + .i_terror (1'b0), + .o_tdata (ter_i_tdata), + .o_tlast (ter_i_tlast), + .o_tvalid (ter_i_tvalid), + .o_tready (ter_i_tready) + ); + assign ter_i_tdest = compute_switch_tdest(ter_i_tdata, SW_DEST_TER); + + wire [WIDTH-1:0] wst_i_tdata, est_i_tdata, nor_i_tdata, sou_i_tdata; + wire [3:0] wst_i_tdest, est_i_tdest, nor_i_tdest, sou_i_tdest; + wire wst_i_tlast, est_i_tlast, nor_i_tlast, sou_i_tlast; + wire wst_i_tvalid, est_i_tvalid, nor_i_tvalid, sou_i_tvalid; + wire wst_i_tready, est_i_tready, nor_i_tready, sou_i_tready; + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) wst_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_wst_tdata), + .s_axis_tdest (s_axis_wst_tdest), + .s_axis_tlast (s_axis_wst_tlast), + .s_axis_tvalid (s_axis_wst_tvalid), + .s_axis_tready (s_axis_wst_tready), + .m_axis_tdata (wst_i_tdata), + .m_axis_tlast (wst_i_tlast), + .m_axis_tvalid (wst_i_tvalid), + .m_axis_tready (wst_i_tready) + ); + assign wst_i_tdest = compute_switch_tdest(wst_i_tdata, SW_DEST_WST); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(1), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) est_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_est_tdata), + .s_axis_tdest (s_axis_est_tdest), + .s_axis_tlast (s_axis_est_tlast), + .s_axis_tvalid (s_axis_est_tvalid), + .s_axis_tready (s_axis_est_tready), + .m_axis_tdata (est_i_tdata), + .m_axis_tlast (est_i_tlast), + .m_axis_tvalid (est_i_tvalid), + .m_axis_tready (est_i_tready) + ); + assign est_i_tdest = compute_switch_tdest(est_i_tdata, SW_DEST_EST); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), // Only north-south traffic has VCs + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) nor_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_nor_tdata), + .s_axis_tdest (s_axis_nor_tdest), + .s_axis_tlast (s_axis_nor_tlast), + .s_axis_tvalid (s_axis_nor_tvalid), + .s_axis_tready (s_axis_nor_tready), + .m_axis_tdata (nor_i_tdata), + .m_axis_tlast (nor_i_tlast), + .m_axis_tvalid (nor_i_tvalid), + .m_axis_tready (nor_i_tready) + ); + assign nor_i_tdest = compute_switch_tdest(nor_i_tdata, SW_DEST_NOR); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), // Only north-south traffic has VCs + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) sou_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_sou_tdata), + .s_axis_tdest (s_axis_sou_tdest), + .s_axis_tlast (s_axis_sou_tlast), + .s_axis_tvalid (s_axis_sou_tvalid), + .s_axis_tready (s_axis_sou_tready), + .m_axis_tdata (sou_i_tdata), + .m_axis_tlast (sou_i_tlast), + .m_axis_tvalid (sou_i_tvalid), + .m_axis_tready (sou_i_tready) + ); + assign sou_i_tdest = compute_switch_tdest(sou_i_tdata, SW_DEST_SOU); + + //------------------------------------------------- + // Switch + //------------------------------------------------- + // Track the input packet state + localparam [0:0] PKT_ST_HEAD = 1'b0; + localparam [0:0] PKT_ST_BODY = 1'b1; + reg [0:0] pkt_state = PKT_ST_HEAD; + + // The switch only accept packets on a single port at a time. + wire sw_in_ready = |({sou_i_tready, nor_i_tready, est_i_tready, wst_i_tready, ter_i_tready}); + wire sw_in_valid = |({sou_i_tvalid, nor_i_tvalid, est_i_tvalid, wst_i_tvalid, ter_i_tvalid}); + wire sw_in_last = |({sou_i_tlast & sou_i_tvalid, nor_i_tlast & nor_i_tvalid, + est_i_tlast & est_i_tvalid, wst_i_tlast & wst_i_tvalid, + ter_i_tlast & ter_i_tvalid}); + + always @(posedge clk) begin + if (reset) begin + pkt_state <= PKT_ST_HEAD; + end else if (sw_in_valid & sw_in_ready) begin + pkt_state <= sw_in_last ? PKT_ST_HEAD : PKT_ST_BODY; + end + end + + // The switch requires the allocation to stay valid until the + // end of the packet. We also might need to keep the previous + // packet's allocation to compute the current one + wire [2:0] switch_alloc; + reg [2:0] prev_switch_alloc = SW_DEST_TER; + reg [2:0] pkt_switch_alloc = SW_DEST_TER; + + always @(posedge clk) begin + if (reset) begin + prev_switch_alloc <= SW_DEST_TER; + pkt_switch_alloc <= SW_DEST_TER; + end else if (sw_in_valid & sw_in_ready) begin + if (pkt_state == PKT_ST_HEAD) + pkt_switch_alloc <= switch_alloc; + if (sw_in_last) + prev_switch_alloc <= switch_alloc; + end + end + + assign switch_alloc = (sw_in_valid && pkt_state == PKT_ST_HEAD) ? + compute_switch_alloc({sou_i_tvalid, nor_i_tvalid, est_i_tvalid, wst_i_tvalid, ter_i_tvalid}, prev_switch_alloc) : + pkt_switch_alloc; + + wire ter_tdest_discard; + axis_switch #( + .DATA_W(WIDTH), .DEST_W(1), .IN_PORTS(5), .OUT_PORTS(5) + ) switch_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata ({sou_i_tdata , nor_i_tdata , est_i_tdata , wst_i_tdata , ter_i_tdata }), + .s_axis_tdest ({sou_i_tdest , nor_i_tdest , est_i_tdest , wst_i_tdest , ter_i_tdest }), + .s_axis_tlast ({sou_i_tlast , nor_i_tlast , est_i_tlast , wst_i_tlast , ter_i_tlast }), + .s_axis_tvalid ({sou_i_tvalid, nor_i_tvalid, est_i_tvalid, wst_i_tvalid, ter_i_tvalid}), + .s_axis_tready ({sou_i_tready, nor_i_tready, est_i_tready, wst_i_tready, ter_i_tready}), + .s_axis_alloc (switch_alloc), + .m_axis_tdata ({m_axis_sou_tdata, m_axis_nor_tdata, m_axis_est_tdata, m_axis_wst_tdata, m_axis_ter_tdata }), + .m_axis_tdest ({m_axis_sou_tdest, m_axis_nor_tdest, m_axis_est_tdest, m_axis_wst_tdest, ter_tdest_discard}), + .m_axis_tlast ({m_axis_sou_tlast, m_axis_nor_tlast, m_axis_est_tlast, m_axis_wst_tlast, m_axis_ter_tlast }), + .m_axis_tvalid ({m_axis_sou_tvalid, m_axis_nor_tvalid, m_axis_est_tvalid, m_axis_wst_tvalid, m_axis_ter_tvalid}), + .m_axis_tready ({m_axis_sou_tready, m_axis_nor_tready, m_axis_est_tready, m_axis_wst_tready, m_axis_ter_tready}) + ); + + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh b/fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh new file mode 100644 index 000000000..466b0c615 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/mesh_node_mapping.vh @@ -0,0 +1,294 @@ +// Copyright 2018 Ettus Research, A National Instruments Company +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Autogenerated file. Do not modify. +// $ ./gen_node_to_coord_mapping.py --dimsize 16 --pattern spiral + +parameter CLOG2_DIM_SIZE = $clog2(DIM_SIZE); //Vivado workaround + + +// DIM_SIZE = 2, PATTERN = SPIRAL +//------------------------------------ +// 0 1 +// 3 2 +localparam [3:0] XCOORD_DIM_002 = {1'd0,1'd1,1'd1,1'd0}; +localparam [3:0] YCOORD_DIM_002 = {1'd1,1'd1,1'd0,1'd0}; + +// DIM_SIZE = 3, PATTERN = SPIRAL +//------------------------------------ +// 6 7 8 +// 5 0 1 +// 4 3 2 +localparam [17:0] XCOORD_DIM_003 = {2'd2,2'd1,2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd1}; +localparam [17:0] YCOORD_DIM_003 = {2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd2,2'd1,2'd1}; + +// DIM_SIZE = 4, PATTERN = SPIRAL +//------------------------------------ +// 6 7 8 9 +// 5 0 1 10 +// 4 3 2 11 +// 15 14 13 12 +localparam [31:0] XCOORD_DIM_004 = {2'd0,2'd1,2'd2,2'd3,2'd3,2'd3,2'd3,2'd2,2'd1,2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd1}; +localparam [31:0] YCOORD_DIM_004 = {2'd3,2'd3,2'd3,2'd3,2'd2,2'd1,2'd0,2'd0,2'd0,2'd0,2'd1,2'd2,2'd2,2'd2,2'd1,2'd1}; + +// DIM_SIZE = 5, PATTERN = SPIRAL +//------------------------------------ +// 20 21 22 23 24 +// 19 6 7 8 9 +// 18 5 0 1 10 +// 17 4 3 2 11 +// 16 15 14 13 12 +localparam [74:0] XCOORD_DIM_005 = {3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd2}; +localparam [74:0] YCOORD_DIM_005 = {3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd3,3'd2,3'd2}; + +// DIM_SIZE = 6, PATTERN = SPIRAL +//------------------------------------ +// 20 21 22 23 24 25 +// 19 6 7 8 9 26 +// 18 5 0 1 10 27 +// 17 4 3 2 11 28 +// 16 15 14 13 12 29 +// 35 34 33 32 31 30 +localparam [107:0] XCOORD_DIM_006 = {3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd2}; +localparam [107:0] YCOORD_DIM_006 = {3'd5,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd4,3'd4,3'd4,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd3,3'd3,3'd2,3'd2}; + +// DIM_SIZE = 7, PATTERN = SPIRAL +//------------------------------------ +// 42 43 44 45 46 47 48 +// 41 20 21 22 23 24 25 +// 40 19 6 7 8 9 26 +// 39 18 5 0 1 10 27 +// 38 17 4 3 2 11 28 +// 37 16 15 14 13 12 29 +// 36 35 34 33 32 31 30 +localparam [146:0] XCOORD_DIM_007 = {3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd3}; +localparam [146:0] YCOORD_DIM_007 = {3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd4,3'd3,3'd3}; + +// DIM_SIZE = 8, PATTERN = SPIRAL +//------------------------------------ +// 42 43 44 45 46 47 48 49 +// 41 20 21 22 23 24 25 50 +// 40 19 6 7 8 9 26 51 +// 39 18 5 0 1 10 27 52 +// 38 17 4 3 2 11 28 53 +// 37 16 15 14 13 12 29 54 +// 36 35 34 33 32 31 30 55 +// 63 62 61 60 59 58 57 56 +localparam [191:0] XCOORD_DIM_008 = {3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd3}; +localparam [191:0] YCOORD_DIM_008 = {3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd7,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd0,3'd1,3'd2,3'd3,3'd4,3'd5,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd6,3'd5,3'd4,3'd3,3'd2,3'd1,3'd1,3'd1,3'd1,3'd1,3'd1,3'd2,3'd3,3'd4,3'd5,3'd5,3'd5,3'd5,3'd5,3'd4,3'd3,3'd2,3'd2,3'd2,3'd2,3'd3,3'd4,3'd4,3'd4,3'd3,3'd3}; + +// DIM_SIZE = 9, PATTERN = SPIRAL +//------------------------------------ +// 72 73 74 75 76 77 78 79 80 +// 71 42 43 44 45 46 47 48 49 +// 70 41 20 21 22 23 24 25 50 +// 69 40 19 6 7 8 9 26 51 +// 68 39 18 5 0 1 10 27 52 +// 67 38 17 4 3 2 11 28 53 +// 66 37 16 15 14 13 12 29 54 +// 65 36 35 34 33 32 31 30 55 +// 64 63 62 61 60 59 58 57 56 +localparam [323:0] XCOORD_DIM_009 = {4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd4}; +localparam [323:0] YCOORD_DIM_009 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd5,4'd4,4'd4}; + +// DIM_SIZE = 10, PATTERN = SPIRAL +//------------------------------------ +// 72 73 74 75 76 77 78 79 80 81 +// 71 42 43 44 45 46 47 48 49 82 +// 70 41 20 21 22 23 24 25 50 83 +// 69 40 19 6 7 8 9 26 51 84 +// 68 39 18 5 0 1 10 27 52 85 +// 67 38 17 4 3 2 11 28 53 86 +// 66 37 16 15 14 13 12 29 54 87 +// 65 36 35 34 33 32 31 30 55 88 +// 64 63 62 61 60 59 58 57 56 89 +// 99 98 97 96 95 94 93 92 91 90 +localparam [399:0] XCOORD_DIM_010 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd4}; +localparam [399:0] YCOORD_DIM_010 = {4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd6,4'd6,4'd6,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd5,4'd5,4'd4,4'd4}; + +// DIM_SIZE = 11, PATTERN = SPIRAL +//------------------------------------ +// 110 111 112 113 114 115 116 117 118 119 120 +// 109 72 73 74 75 76 77 78 79 80 81 +// 108 71 42 43 44 45 46 47 48 49 82 +// 107 70 41 20 21 22 23 24 25 50 83 +// 106 69 40 19 6 7 8 9 26 51 84 +// 105 68 39 18 5 0 1 10 27 52 85 +// 104 67 38 17 4 3 2 11 28 53 86 +// 103 66 37 16 15 14 13 12 29 54 87 +// 102 65 36 35 34 33 32 31 30 55 88 +// 101 64 63 62 61 60 59 58 57 56 89 +// 100 99 98 97 96 95 94 93 92 91 90 +localparam [483:0] XCOORD_DIM_011 = {4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd5}; +localparam [483:0] YCOORD_DIM_011 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd6,4'd5,4'd5}; + +// DIM_SIZE = 12, PATTERN = SPIRAL +//------------------------------------ +// 110 111 112 113 114 115 116 117 118 119 120 121 +// 109 72 73 74 75 76 77 78 79 80 81 122 +// 108 71 42 43 44 45 46 47 48 49 82 123 +// 107 70 41 20 21 22 23 24 25 50 83 124 +// 106 69 40 19 6 7 8 9 26 51 84 125 +// 105 68 39 18 5 0 1 10 27 52 85 126 +// 104 67 38 17 4 3 2 11 28 53 86 127 +// 103 66 37 16 15 14 13 12 29 54 87 128 +// 102 65 36 35 34 33 32 31 30 55 88 129 +// 101 64 63 62 61 60 59 58 57 56 89 130 +// 100 99 98 97 96 95 94 93 92 91 90 131 +// 143 142 141 140 139 138 137 136 135 134 133 132 +localparam [575:0] XCOORD_DIM_012 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd5}; +localparam [575:0] YCOORD_DIM_012 = {4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd7,4'd7,4'd7,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd6,4'd6,4'd5,4'd5}; + +// DIM_SIZE = 13, PATTERN = SPIRAL +//------------------------------------ +// 156 157 158 159 160 161 162 163 164 165 166 167 168 +// 155 110 111 112 113 114 115 116 117 118 119 120 121 +// 154 109 72 73 74 75 76 77 78 79 80 81 122 +// 153 108 71 42 43 44 45 46 47 48 49 82 123 +// 152 107 70 41 20 21 22 23 24 25 50 83 124 +// 151 106 69 40 19 6 7 8 9 26 51 84 125 +// 150 105 68 39 18 5 0 1 10 27 52 85 126 +// 149 104 67 38 17 4 3 2 11 28 53 86 127 +// 148 103 66 37 16 15 14 13 12 29 54 87 128 +// 147 102 65 36 35 34 33 32 31 30 55 88 129 +// 146 101 64 63 62 61 60 59 58 57 56 89 130 +// 145 100 99 98 97 96 95 94 93 92 91 90 131 +// 144 143 142 141 140 139 138 137 136 135 134 133 132 +localparam [675:0] XCOORD_DIM_013 = {4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd6}; +localparam [675:0] YCOORD_DIM_013 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd7,4'd6,4'd6}; + +// DIM_SIZE = 14, PATTERN = SPIRAL +//------------------------------------ +// 156 157 158 159 160 161 162 163 164 165 166 167 168 169 +// 155 110 111 112 113 114 115 116 117 118 119 120 121 170 +// 154 109 72 73 74 75 76 77 78 79 80 81 122 171 +// 153 108 71 42 43 44 45 46 47 48 49 82 123 172 +// 152 107 70 41 20 21 22 23 24 25 50 83 124 173 +// 151 106 69 40 19 6 7 8 9 26 51 84 125 174 +// 150 105 68 39 18 5 0 1 10 27 52 85 126 175 +// 149 104 67 38 17 4 3 2 11 28 53 86 127 176 +// 148 103 66 37 16 15 14 13 12 29 54 87 128 177 +// 147 102 65 36 35 34 33 32 31 30 55 88 129 178 +// 146 101 64 63 62 61 60 59 58 57 56 89 130 179 +// 145 100 99 98 97 96 95 94 93 92 91 90 131 180 +// 144 143 142 141 140 139 138 137 136 135 134 133 132 181 +// 195 194 193 192 191 190 189 188 187 186 185 184 183 182 +localparam [783:0] XCOORD_DIM_014 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd6}; +localparam [783:0] YCOORD_DIM_014 = {4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd8,4'd8,4'd8,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd7,4'd7,4'd6,4'd6}; + +// DIM_SIZE = 15, PATTERN = SPIRAL +//------------------------------------ +// 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 +// 209 156 157 158 159 160 161 162 163 164 165 166 167 168 169 +// 208 155 110 111 112 113 114 115 116 117 118 119 120 121 170 +// 207 154 109 72 73 74 75 76 77 78 79 80 81 122 171 +// 206 153 108 71 42 43 44 45 46 47 48 49 82 123 172 +// 205 152 107 70 41 20 21 22 23 24 25 50 83 124 173 +// 204 151 106 69 40 19 6 7 8 9 26 51 84 125 174 +// 203 150 105 68 39 18 5 0 1 10 27 52 85 126 175 +// 202 149 104 67 38 17 4 3 2 11 28 53 86 127 176 +// 201 148 103 66 37 16 15 14 13 12 29 54 87 128 177 +// 200 147 102 65 36 35 34 33 32 31 30 55 88 129 178 +// 199 146 101 64 63 62 61 60 59 58 57 56 89 130 179 +// 198 145 100 99 98 97 96 95 94 93 92 91 90 131 180 +// 197 144 143 142 141 140 139 138 137 136 135 134 133 132 181 +// 196 195 194 193 192 191 190 189 188 187 186 185 184 183 182 +localparam [899:0] XCOORD_DIM_015 = {4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd7}; +localparam [899:0] YCOORD_DIM_015 = {4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd8,4'd7,4'd7}; + +// DIM_SIZE = 16, PATTERN = SPIRAL +//------------------------------------ +// 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 +// 209 156 157 158 159 160 161 162 163 164 165 166 167 168 169 226 +// 208 155 110 111 112 113 114 115 116 117 118 119 120 121 170 227 +// 207 154 109 72 73 74 75 76 77 78 79 80 81 122 171 228 +// 206 153 108 71 42 43 44 45 46 47 48 49 82 123 172 229 +// 205 152 107 70 41 20 21 22 23 24 25 50 83 124 173 230 +// 204 151 106 69 40 19 6 7 8 9 26 51 84 125 174 231 +// 203 150 105 68 39 18 5 0 1 10 27 52 85 126 175 232 +// 202 149 104 67 38 17 4 3 2 11 28 53 86 127 176 233 +// 201 148 103 66 37 16 15 14 13 12 29 54 87 128 177 234 +// 200 147 102 65 36 35 34 33 32 31 30 55 88 129 178 235 +// 199 146 101 64 63 62 61 60 59 58 57 56 89 130 179 236 +// 198 145 100 99 98 97 96 95 94 93 92 91 90 131 180 237 +// 197 144 143 142 141 140 139 138 137 136 135 134 133 132 181 238 +// 196 195 194 193 192 191 190 189 188 187 186 185 184 183 182 239 +// 255 254 253 252 251 250 249 248 247 246 245 244 243 242 241 240 +localparam [1023:0] XCOORD_DIM_016 = {4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd7}; +localparam [1023:0] YCOORD_DIM_016 = {4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd15,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd0,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd14,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd1,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd13,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd2,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd12,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd3,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd11,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd4,4'd5,4'd6,4'd7,4'd8,4'd9,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd10,4'd9,4'd8,4'd7,4'd6,4'd5,4'd5,4'd5,4'd5,4'd5,4'd5,4'd6,4'd7,4'd8,4'd9,4'd9,4'd9,4'd9,4'd9,4'd8,4'd7,4'd6,4'd6,4'd6,4'd6,4'd7,4'd8,4'd8,4'd8,4'd7,4'd7}; + + +function [CLOG2_DIM_SIZE-1:0] node_to_xdst; + input [WIDTH-1:0] header; +begin + if (DIM_SIZE == 2) + node_to_xdst = XCOORD_DIM_002[1*header[1:0] +: 1]; + else if (DIM_SIZE == 3) + node_to_xdst = XCOORD_DIM_003[2*header[3:0] +: 2]; + else if (DIM_SIZE == 4) + node_to_xdst = XCOORD_DIM_004[2*header[3:0] +: 2]; + else if (DIM_SIZE == 5) + node_to_xdst = XCOORD_DIM_005[3*header[4:0] +: 3]; + else if (DIM_SIZE == 6) + node_to_xdst = XCOORD_DIM_006[3*header[5:0] +: 3]; + else if (DIM_SIZE == 7) + node_to_xdst = XCOORD_DIM_007[3*header[5:0] +: 3]; + else if (DIM_SIZE == 8) + node_to_xdst = XCOORD_DIM_008[3*header[5:0] +: 3]; + else if (DIM_SIZE == 9) + node_to_xdst = XCOORD_DIM_009[4*header[6:0] +: 4]; + else if (DIM_SIZE == 10) + node_to_xdst = XCOORD_DIM_010[4*header[6:0] +: 4]; + else if (DIM_SIZE == 11) + node_to_xdst = XCOORD_DIM_011[4*header[6:0] +: 4]; + else if (DIM_SIZE == 12) + node_to_xdst = XCOORD_DIM_012[4*header[7:0] +: 4]; + else if (DIM_SIZE == 13) + node_to_xdst = XCOORD_DIM_013[4*header[7:0] +: 4]; + else if (DIM_SIZE == 14) + node_to_xdst = XCOORD_DIM_014[4*header[7:0] +: 4]; + else if (DIM_SIZE == 15) + node_to_xdst = XCOORD_DIM_015[4*header[7:0] +: 4]; + else if (DIM_SIZE == 16) + node_to_xdst = XCOORD_DIM_016[4*header[7:0] +: 4]; + else + node_to_xdst = {CLOG2_DIM_SIZE{1'd0}}; +end endfunction + +function [CLOG2_DIM_SIZE-1:0] node_to_ydst; + input [WIDTH-1:0] header; +begin + if (DIM_SIZE == 2) + node_to_ydst = YCOORD_DIM_002[1*header[1:0] +: 1]; + else if (DIM_SIZE == 3) + node_to_ydst = YCOORD_DIM_003[2*header[3:0] +: 2]; + else if (DIM_SIZE == 4) + node_to_ydst = YCOORD_DIM_004[2*header[3:0] +: 2]; + else if (DIM_SIZE == 5) + node_to_ydst = YCOORD_DIM_005[3*header[4:0] +: 3]; + else if (DIM_SIZE == 6) + node_to_ydst = YCOORD_DIM_006[3*header[5:0] +: 3]; + else if (DIM_SIZE == 7) + node_to_ydst = YCOORD_DIM_007[3*header[5:0] +: 3]; + else if (DIM_SIZE == 8) + node_to_ydst = YCOORD_DIM_008[3*header[5:0] +: 3]; + else if (DIM_SIZE == 9) + node_to_ydst = YCOORD_DIM_009[4*header[6:0] +: 4]; + else if (DIM_SIZE == 10) + node_to_ydst = YCOORD_DIM_010[4*header[6:0] +: 4]; + else if (DIM_SIZE == 11) + node_to_ydst = YCOORD_DIM_011[4*header[6:0] +: 4]; + else if (DIM_SIZE == 12) + node_to_ydst = YCOORD_DIM_012[4*header[7:0] +: 4]; + else if (DIM_SIZE == 13) + node_to_ydst = YCOORD_DIM_013[4*header[7:0] +: 4]; + else if (DIM_SIZE == 14) + node_to_ydst = YCOORD_DIM_014[4*header[7:0] +: 4]; + else if (DIM_SIZE == 15) + node_to_ydst = YCOORD_DIM_015[4*header[7:0] +: 4]; + else if (DIM_SIZE == 16) + node_to_ydst = YCOORD_DIM_016[4*header[7:0] +: 4]; + else + node_to_ydst = {CLOG2_DIM_SIZE{1'd0}}; +end endfunction + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl new file mode 100644 index 000000000..39440b512 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.tcl @@ -0,0 +1,18 @@ +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +create_project tmp_proj -part xc7k410tffg900-3 -in_memory +add_files {axis_ctrl_crossbar_nxn_top.v ../axis_ctrl_crossbar_nxn.v ../axis_ctrl_crossbar_2d_mesh.v ../mesh_2d_dor_router_multi_sw.v ../axis_switch.v ../axis_ingress_vc_buff.v ../mesh_node_mapping.vh ../mesh_2d_dor_router_single_sw.v ../torus_2d_dor_router_single_sw.v ../torus_2d_dor_router_multi_sw.v ../axis_port_terminator.v} +add_files {../../../fifo/axi_fifo_flop.v ../../../fifo/axi_fifo_flop2.v ../../../fifo/axi_fifo.v ../../../fifo/axi_mux_select.v ../../../fifo/axi_fifo_bram.v ../../../fifo/axi_fifo_cascade.v ../../../fifo/axi_mux.v ../../../fifo/axi_fifo_short.v ../../../fifo/axi_demux.v ../../../fifo/axi_packet_gate.v ../../../control/map/cam_priority_encoder.v ../../../control/map/cam_srl.v ../../../control/map/cam_bram.v ../../../control/map/cam.v ../../../control/map/kv_map.v ../../../control/map/axis_muxed_kv_map.v ../../../control/ram_2port.v} +set_property top axis_ctrl_crossbar_nxn_top [current_fileset] +synth_design +create_clock -name clk -period 2.0 [get_ports clk] +report_utilization -no_primitives -file axis_ctrl_crossbar_nxn.rpt +report_timing_summary -setup -no_detailed_paths -no_header -datasheet -append -file axis_ctrl_crossbar_nxn.rpt +write_checkpoint -force axis_ctrl_crossbar_nxn.dcp +close_project +exit
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in new file mode 100644 index 000000000..6805100b9 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/axis_ctrl_crossbar_nxn_top.v.in @@ -0,0 +1,47 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module axis_ctrl_crossbar_nxn_top( + input clk, + input rst +); + // Router global config + localparam IMPL = "{top}"; + localparam NPORTS = {ports}; + localparam DWIDTH = {dataw}; + localparam MTU = {mtu}; + localparam ROUTING = "{ralloc}"; + + (* dont_touch = "true"*) wire [(DWIDTH*NPORTS)-1:0] s_axis_tdata , m_axis_tdata ; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tlast , m_axis_tlast ; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tvalid, m_axis_tvalid; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tready, m_axis_tready; + (* dont_touch = "true"*) wire deadlock_detected; + + axis_ctrl_crossbar_nxn #( + .WIDTH (DWIDTH), + .NPORTS (NPORTS), + .TOPOLOGY (IMPL), + .INGRESS_BUFF_SIZE(MTU), + .ROUTER_BUFF_SIZE (MTU), + .ROUTING_ALLOC (ROUTING), + .SWITCH_ALLOC ("ROUND-ROBIN") + ) router_dut_i ( + .clk (clk), + .reset (rst), + .s_axis_tdata (s_axis_tdata ), + .s_axis_tlast (s_axis_tlast ), + .s_axis_tvalid (s_axis_tvalid), + .s_axis_tready (s_axis_tready), + .m_axis_tdata (m_axis_tdata ), + .m_axis_tlast (m_axis_tlast ), + .m_axis_tvalid (m_axis_tvalid), + .m_axis_tready (m_axis_tready), + .deadlock_detected(deadlock_detected) + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl new file mode 100644 index 000000000..304384aee --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.tcl @@ -0,0 +1,18 @@ +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +create_project tmp_proj -part xc7k410tffg900-3 -in_memory +add_files {chdr_crossbar_nxn_top.v ../chdr_crossbar_nxn.v ../axis_switch.v ../chdr_xb_ingress_buff.v ../chdr_xb_routing_table.v ../../core/chdr_mgmt_pkt_handler.v ../../core/rfnoc_chdr_utils.vh ../../core/rfnoc_chdr_internal_utils.vh} +add_files {../../../fifo/axi_fifo_flop.v ../../../fifo/axi_fifo_flop2.v ../../../fifo/axi_fifo.v ../../../fifo/axi_mux_select.v ../../../fifo/axi_fifo_bram.v ../../../fifo/axi_fifo_cascade.v ../../../fifo/axi_mux.v ../../../fifo/axi_fifo_short.v ../../../fifo/axi_demux.v ../../../fifo/axi_packet_gate.v ../../../control/map/cam_priority_encoder.v ../../../control/map/cam_srl.v ../../../control/map/cam_bram.v ../../../control/map/cam.v ../../../control/map/kv_map.v ../../../control/map/axis_muxed_kv_map.v ../../../control/ram_2port.v} +set_property top chdr_crossbar_nxn_top [current_fileset] +synth_design +create_clock -name clk -period 2.0 [get_ports clk] +report_utilization -no_primitives -file chdr_crossbar_nxn.rpt +report_timing_summary -setup -no_detailed_paths -no_header -datasheet -append -file chdr_crossbar_nxn.rpt +write_checkpoint -force chdr_crossbar_nxn.dcp +close_project +exit
\ No newline at end of file diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in new file mode 100644 index 000000000..fbf0852a3 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/chdr_crossbar_nxn_top.v.in @@ -0,0 +1,55 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +module chdr_crossbar_nxn_top( + input clk, + input rst +); + // Router global config + localparam NPORTS = {ports}; + localparam DWIDTH = {dataw}; + localparam MTU = {mtu}; + localparam RLUT_SIZE = {rlutsize}; + localparam OPTIMIZE = "{opt}"; + + (* dont_touch = "true"*) wire [(DWIDTH*NPORTS)-1:0] s_axis_tdata , m_axis_tdata ; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tlast , m_axis_tlast ; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tvalid, m_axis_tvalid; + (* dont_touch = "true"*) wire [NPORTS-1:0] s_axis_tready, m_axis_tready; + + chdr_crossbar_nxn #( + .CHDR_W (DWIDTH), + .NPORTS (NPORTS), + .DEFAULT_PORT (0), + .MTU (MTU), + .ROUTE_TBL_SIZE (RLUT_SIZE), + .MUX_ALLOC ("ROUND-ROBIN"), + .OPTIMIZE (OPTIMIZE), + .NPORTS_MGMT (NPORTS), + .EXT_RTCFG_PORT (1) + ) router_dut_i ( + // General + .clk (clk), + .reset (rst), + // Inputs + .s_axis_tdata (s_axis_tdata), + .s_axis_tlast (s_axis_tlast), + .s_axis_tvalid (s_axis_tvalid), + .s_axis_tready (s_axis_tready), + // Output + .m_axis_tdata (m_axis_tdata), + .m_axis_tlast (m_axis_tlast), + .m_axis_tvalid (m_axis_tvalid), + .m_axis_tready (m_axis_tready), + // External rtcfg port + .ext_rtcfg_stb (0), + .ext_rtcfg_addr (0), + .ext_rtcfg_data (0), + .ext_rtcfg_ack () + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py new file mode 100755 index 000000000..4ca6e07fa --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_axis_ctrl_crossbar_nxn.py @@ -0,0 +1,37 @@ +#! /usr/bin/python3 +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +import argparse +import synth_run + +modname = 'axis_ctrl_crossbar_nxn' + +# Parse command line options +def get_options(): + parser = argparse.ArgumentParser(description='Generate synthesis results for ' + modname) + parser.add_argument('--top', type=str, default='TORUS', help='Topologies (CSV)') + parser.add_argument('--ports', type=str, default='8', help='Number of ports (CSV)') + parser.add_argument('--dataw', type=str, default='32', help='Router datapath width (CSV)') + parser.add_argument('--mtu', type=str, default='5', help='MTU (CSV)') + parser.add_argument('--ralloc', type=str, default='WORMHOLE', help='Router allocation method (CSV)') + return parser.parse_args() + +def main(): + args = get_options() + keys = ['top', 'ports', 'dataw', 'mtu', 'ralloc'] + for top in args.top.strip().split(','): + for ports in args.ports.strip().split(','): + for dataw in args.dataw.strip().split(','): + for mtu in args.mtu.strip().split(','): + for ralloc in args.ralloc.strip().split(','): + # Collect parameters + transform = {'ports':ports, 'dataw':dataw, 'mtu':mtu, 'top':top, 'ralloc':ralloc} + synth_run.synth_run(modname, keys, transform) + +if __name__ == '__main__': + main() diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py new file mode 100755 index 000000000..668e7a247 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_chdr_crossbar_nxn.py @@ -0,0 +1,37 @@ +#! /usr/bin/python3 +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +import argparse +import synth_run + +modname = 'chdr_crossbar_nxn' + +# Parse command line options +def get_options(): + parser = argparse.ArgumentParser(description='Generate synthesis results for ' + modname) + parser.add_argument('--opt', type=str, default='AREA', help='Optimization strategies (CSV)') + parser.add_argument('--ports', type=str, default='8', help='Number of ports (CSV)') + parser.add_argument('--dataw', type=str, default='64', help='Router datapath width (CSV)') + parser.add_argument('--mtu', type=str, default='10', help='MTU or Ingress buffer size (CSV)') + parser.add_argument('--rlutsize', type=str, default='6', help='Router lookup table size (CSV)') + return parser.parse_args() + +def main(): + args = get_options() + keys = ['opt', 'ports', 'dataw', 'mtu', 'rlutsize'] + for opt in args.opt.strip().split(','): + for ports in args.ports.strip().split(','): + for dataw in args.dataw.strip().split(','): + for mtu in args.mtu.strip().split(','): + for rlutsize in args.rlutsize.strip().split(','): + # Collect parameters + transform = {'opt':opt, 'ports':ports, 'dataw':dataw, 'mtu':mtu, 'rlutsize':rlutsize} + synth_run.synth_run(modname, keys, transform) + +if __name__ == '__main__': + main() diff --git a/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py new file mode 100644 index 000000000..a9801ac20 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/synth/synth_run.py @@ -0,0 +1,67 @@ +#! /usr/bin/python3 +#!/usr/bin/python3 +# +# Copyright 2018 Ettus Research, a National Instruments Company +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +import sys, os +import subprocess +import re + +def synth_run(modname, keys, transform): + prefix = modname + '_' + ('_'.join(['%s%s'%(k,transform[k]) for k in keys])) + print('='*(len(prefix)+2)) + print(' %s '%(prefix)) + print('='*(len(prefix)+2)) + # Write Verilog top-level file + with open(modname + '_top.v.in', 'r') as in_file: + with open(modname + '_top.v', 'w') as out_file: + out_file.write(in_file.read().format(**transform)) + # Run Vivado + exitcode = subprocess.Popen( + 'vivado -mode tcl -source %s_top.tcl -nolog -nojou'%(modname), shell=True + ).wait() + if exitcode != 0: + raise RuntimeError('Error running vivado. Was setupenv.sh run?') + # Extract info + lut = 100.0 + reg = 100.0 + bram = 100.0 + dsp = 100.0 + fmax = 0.0 + with open(modname + '.rpt', 'r') as rpt_file: + rpt = rpt_file.readlines() + for line in rpt: + lm = re.match(r'.*Slice LUTs\*.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line) + if lm is not None: + lut = float(lm.group(1).strip()) + rm = re.match(r'.*Slice Registers.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line) + if rm is not None: + reg = float(rm.group(1).strip()) + bm = re.match(r'.*Block RAM Tile.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line) + if bm is not None: + bram = float(bm.group(1).strip()) + dm = re.match(r'.*DSPs.*\|(.*)\|(.*)\|(.*)\|(.*)\|.*', line) + if dm is not None: + dsp = float(dm.group(1).strip()) + tm = re.match(r'.*clk.*\| clk\s*\|(.*)\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|', line) + if tm is not None: + fmax = 1000.0/float(tm.group(1).strip()) + # Save report + os.rename(modname + '.rpt', prefix + '.rpt') + os.rename(modname + '.dcp', prefix + '.dcp') + try: + os.remove(modname + '_top.v') + os.remove('fsm_encoding.os') + except FileNotFoundError: + pass + # Write summary report line + res_keys = ['lut','reg','bram','dsp','fmax'] + res = {'lut':lut, 'reg':reg, 'bram':bram, 'dsp':dsp, 'fmax':fmax, 'prefix':prefix} + if not os.path.exists(modname + '_summary.csv'): + with open(modname + '_summary.csv', 'w') as summaryf: + summaryf.write((','.join(keys + res_keys)) + '\n') + with open(modname + '_summary.csv', 'a') as summaryf: + summaryf.write((','.join(['%s'%(transform[k]) for k in keys])) + ',' + (','.join(['%.1f'%(res[k]) for k in res_keys])) + '\n') diff --git a/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v new file mode 100644 index 000000000..cd70450a0 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_multi_sw.v @@ -0,0 +1,338 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: torus_2d_dor_router_multi_sw +// Description: +// Alternate implementation for torus_2d_dor_router_single_sw with +// multiple switches for independent paths between inputs and outputs +// **NOTE**: This module has not been validated + +module torus_2d_dor_router_multi_sw #( + parameter WIDTH = 64, + parameter DIM_SIZE = 4, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0, + parameter TERM_BUFF_SIZE = 5, + parameter XB_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE" +) ( + // Clocks and resets + input wire clk, + input wire reset, + + // Terminal connections + input wire [WIDTH-1:0] s_axis_term_tdata, + input wire s_axis_term_tlast, + input wire s_axis_term_tvalid, + output wire s_axis_term_tready, + output wire [WIDTH-1:0] m_axis_term_tdata, + output wire m_axis_term_tlast, + output wire m_axis_term_tvalid, + input wire m_axis_term_tready, + + // X-dimension inter-XB connections + input wire [WIDTH-1:0] s_axis_xdim_tdata, + input wire [0:0] s_axis_xdim_tdest, + input wire s_axis_xdim_tlast, + input wire s_axis_xdim_tvalid, + output wire s_axis_xdim_tready, + output wire [WIDTH-1:0] m_axis_xdim_tdata, + output wire [0:0] m_axis_xdim_tdest, + output wire m_axis_xdim_tlast, + output wire m_axis_xdim_tvalid, + input wire m_axis_xdim_tready, + + // Y-dimension inter-XB connections + input wire [WIDTH-1:0] s_axis_ydim_tdata, + input wire [0:0] s_axis_ydim_tdest, + input wire s_axis_ydim_tlast, + input wire s_axis_ydim_tvalid, + output wire s_axis_ydim_tready, + output wire [WIDTH-1:0] m_axis_ydim_tdata, + output wire [0:0] m_axis_ydim_tdest, + output wire m_axis_ydim_tlast, + output wire m_axis_ydim_tvalid, + input wire m_axis_ydim_tready +); + + // ------------------------------------------------- + // Routing functions + // ------------------------------------------------- + `include "mesh_node_mapping.vh" + + function [2:0] term_in_route; + input [WIDTH:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst, xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, 2 LSBs are the router destination + // - Long journeys get VC = 1 to bypass local traffic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + term_in_route = {1'b0 /* VC don't care */, 2'd2 /* term out */}; + end else if (xdst == XB_ADDR_X) begin + term_in_route = {ydiff[$clog2(DIM_SIZE)-1], 2'd0 /* ydim out */}; + end else begin + term_in_route = {xdiff[$clog2(DIM_SIZE)-1], 2'd1 /* xdim out */}; + end + end + endfunction + + function [2:0] xdim_in_route; + input [WIDTH:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst, xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, 2 LSBs are the router destination + // - Long journeys get VC = 1 to bypass local traffic + if (xdst == XB_ADDR_X && ydst == XB_ADDR_Y) begin + xdim_in_route = {1'b0 /* VC don't care */, 2'd2 /* term out */}; + end else if (xdst == XB_ADDR_X) begin + xdim_in_route = {ydiff[$clog2(DIM_SIZE)-1], 2'd0 /* ydim out */}; + end else begin + xdim_in_route = {xdiff[$clog2(DIM_SIZE)-1], 2'd1 /* xdim out */}; + end + end + endfunction + + function [1:0] ydim_in_route; + input [WIDTH:0] header; + reg [$clog2(DIM_SIZE)-1:0] ydst, ydiff; + begin + ydst = node_to_ydst(header); + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, LSB is the router destination + // - Long journeys get VC = 1 to bypass local traffic + if (ydst == XB_ADDR_Y) begin + ydim_in_route = {1'b0 /* VC don't care */, 1'd1 /* term out */}; + end else begin + ydim_in_route = {ydiff[$clog2(DIM_SIZE)-1], 1'd0 /* ydim out */}; + end + end + endfunction + + // ------------------------------------------------- + // Input demuxes + // ------------------------------------------------- + wire [WIDTH-1:0] ti_gt_tdata; + wire ti_gt_tdest; + wire ti_gt_tlast; + wire ti_gt_tvalid; + wire ti_gt_tready; + wire [WIDTH-1:0] t2t_tdata, t2x_tdata, t2y_tdata; + wire t2t_tdest, t2x_tdest, t2y_tdest; + wire t2t_tlast, t2x_tlast, t2y_tlast; + wire t2t_tvalid, t2x_tvalid, t2y_tvalid; + wire t2t_tready, t2x_tready, t2y_tready; + wire [WIDTH-1:0] term_in_hdr; + wire [1:0] term_in_port; + + assign {ti_gt_tdest, term_in_port} = term_in_route(term_in_hdr); + + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE) + ) term_in_pkt_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_term_tdata), + .i_tlast (s_axis_term_tlast), + .i_tvalid (s_axis_term_tvalid), + .i_tready (s_axis_term_tready), + .i_terror (1'b0), + .o_tdata (ti_gt_tdata), + .o_tlast (ti_gt_tlast), + .o_tvalid (ti_gt_tvalid), + .o_tready (ti_gt_tready) + ); + + axi_demux #( + .WIDTH(WIDTH+1), .SIZE(3), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) term_in_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (term_in_hdr), + .dest (term_in_port), + .i_tdata ({ti_gt_tdest, ti_gt_tdata}), + .i_tlast (ti_gt_tlast), + .i_tvalid (ti_gt_tvalid), + .i_tready (ti_gt_tready), + .o_tdata ({t2t_tdest, t2t_tdata, t2x_tdest, t2x_tdata, t2y_tdest, t2y_tdata}), + .o_tlast ({t2t_tlast, t2x_tlast, t2y_tlast}), + .o_tvalid ({t2t_tvalid, t2x_tvalid, t2y_tvalid}), + .o_tready ({t2t_tready, t2x_tready, t2y_tready}) + ); + + wire [WIDTH-1:0] xi_gt_tdata; + wire xi_gt_tdest; + wire xi_gt_tlast; + wire xi_gt_tvalid; + wire xi_gt_tready; + wire [WIDTH-1:0] x2t_tdata, x2x_tdata, x2y_tdata; + wire x2t_tdest, x2x_tdest, x2y_tdest; + wire x2t_tlast, x2x_tlast, x2y_tlast; + wire x2t_tvalid, x2x_tvalid, x2y_tvalid; + wire x2t_tready, x2x_tready, x2y_tready; + wire [WIDTH-1:0] xdim_in_hdr; + wire [1:0] xdim_in_port; + + assign {xi_gt_tdest, xdim_in_port} = xdim_in_route(xdim_in_hdr); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) xdim_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_xdim_tdata), + .s_axis_tdest (s_axis_xdim_tdest), + .s_axis_tlast (s_axis_xdim_tlast), + .s_axis_tvalid (s_axis_xdim_tvalid), + .s_axis_tready (s_axis_xdim_tready), + .m_axis_tdata (xi_gt_tdata), + .m_axis_tlast (xi_gt_tlast), + .m_axis_tvalid (xi_gt_tvalid), + .m_axis_tready (xi_gt_tready) + ); + + axi_demux #( + .WIDTH(WIDTH+1), .SIZE(3), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) xdim_in_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (xdim_in_hdr), + .dest (xdim_in_port), + .i_tdata ({xi_gt_tdest, xi_gt_tdata}), + .i_tlast (xi_gt_tlast), + .i_tvalid (xi_gt_tvalid), + .i_tready (xi_gt_tready), + .o_tdata ({x2t_tdest, x2t_tdata, x2x_tdest, x2x_tdata, x2y_tdest, x2y_tdata}), + .o_tlast ({x2t_tlast, x2x_tlast, x2y_tlast}), + .o_tvalid ({x2t_tvalid, x2x_tvalid, x2y_tvalid}), + .o_tready ({x2t_tready, x2x_tready, x2y_tready}) + ); + + wire [WIDTH-1:0] yi_gt_tdata; + wire yi_gt_tdest; + wire yi_gt_tlast; + wire yi_gt_tvalid; + wire yi_gt_tready; + wire [WIDTH-1:0] y2t_tdata, y2y_tdata; + wire y2t_tdest, y2y_tdest; + wire y2t_tlast, y2y_tlast; + wire y2t_tvalid, y2y_tvalid; + wire y2t_tready, y2y_tready; + wire [WIDTH-1:0] ydim_in_hdr; + wire [0:0] ydim_in_port; + + assign {yi_gt_tdest, ydim_in_port} = ydim_in_route(ydim_in_hdr); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) ydim_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_ydim_tdata ), + .s_axis_tdest (s_axis_ydim_tdest ), + .s_axis_tlast (s_axis_ydim_tlast ), + .s_axis_tvalid (s_axis_ydim_tvalid), + .s_axis_tready (s_axis_ydim_tready), + .m_axis_tdata (yi_gt_tdata ), + .m_axis_tlast (yi_gt_tlast ), + .m_axis_tvalid (yi_gt_tvalid), + .m_axis_tready (yi_gt_tready) + ); + + axi_demux #( + .WIDTH(WIDTH+1), .SIZE(2), + .PRE_FIFO_SIZE(0 /* must be 0 */), .POST_FIFO_SIZE(0) + ) ydim_in_demux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .header (ydim_in_hdr), + .dest (ydim_in_port), + .i_tdata ({yi_gt_tdest, yi_gt_tdata}), + .i_tlast (yi_gt_tlast), + .i_tvalid (yi_gt_tvalid), + .i_tready (yi_gt_tready), + .o_tdata ({y2t_tdest, y2t_tdata, y2y_tdest, y2y_tdata}), + .o_tlast ({y2t_tlast, y2y_tlast}), + .o_tvalid ({y2t_tvalid, y2y_tvalid}), + .o_tready ({y2t_tready, y2y_tready}) + ); + + // ------------------------------------------------- + // Output muxes + // ------------------------------------------------- + wire term_tdest_discard; + axi_mux #( + .WIDTH(WIDTH+1), .SIZE(3), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) term_out_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2t_tdest, t2t_tdata, x2t_tdest, x2t_tdata, y2t_tdest, y2t_tdata}), + .i_tlast ({t2t_tlast, x2t_tlast, y2t_tlast }), + .i_tvalid ({t2t_tvalid, x2t_tvalid, y2t_tvalid}), + .i_tready ({t2t_tready, x2t_tready, y2t_tready}), + .o_tdata ({term_tdest_discard, m_axis_term_tdata}), + .o_tlast (m_axis_term_tlast), + .o_tvalid (m_axis_term_tvalid), + .o_tready (m_axis_term_tready) + ); + + axi_mux #( + .WIDTH(WIDTH+1), .SIZE(2), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) xdim_out_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2x_tdest, t2x_tdata, x2x_tdest, x2x_tdata}), + .i_tlast ({t2x_tlast, x2x_tlast}), + .i_tvalid ({t2x_tvalid, x2x_tvalid}), + .i_tready ({t2x_tready, x2x_tready}), + .o_tdata ({m_axis_xdim_tdest, m_axis_xdim_tdata}), + .o_tlast (m_axis_xdim_tlast ), + .o_tvalid (m_axis_xdim_tvalid), + .o_tready (m_axis_xdim_tready) + ); + + axi_mux #( + .WIDTH(WIDTH+1), .SIZE(3), + .PRE_FIFO_SIZE(0), .POST_FIFO_SIZE(1) + ) ydim_out_mux_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({t2y_tdest, t2y_tdata, x2y_tdest, x2y_tdata, y2y_tdest, y2y_tdata}), + .i_tlast ({t2y_tlast, x2y_tlast, y2y_tlast }), + .i_tvalid ({t2y_tvalid, x2y_tvalid, y2y_tvalid}), + .i_tready ({t2y_tready, x2y_tready, y2y_tready}), + .o_tdata ({m_axis_ydim_tdest, m_axis_ydim_tdata}), + .o_tlast (m_axis_ydim_tlast), + .o_tvalid (m_axis_ydim_tvalid), + .o_tready (m_axis_ydim_tready) + ); + +endmodule + diff --git a/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v new file mode 100644 index 000000000..21a66782d --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/crossbar/torus_2d_dor_router_single_sw.v @@ -0,0 +1,294 @@ +// +// Copyright 2018 Ettus Research, A National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: torus_2d_dor_router_single_sw +// Description: +// This module implements the router for a 2-dimentional (2d) +// torus network that uses dimension order routing (dor) and has a +// single underlying switch (single_sw). It uses AXI-Stream for all of its +// links. +// The torus topology, routing algorithms and the router architecture is +// described in README.md in this directory. +// Parameters: +// - WIDTH: Width of the AXI-Stream data bus +// - DIM_SIZE: Number of routers alone one dimension +// - XB_ADDR_X: The X-coordinate of this router in the topology +// - XB_ADDR_Y: The Y-coordinate of this router in the topology +// - TERM_BUFF_SIZE: log2 of the ingress terminal buffer size (in words) +// - XB_BUFF_SIZE: log2 of the ingress inter-router buffer size (in words) +// - ROUTING_ALLOC: Algorithm to allocate routing paths between routers. +// * WORMHOLE: Allocate route as soon as first word in pkt arrives +// * CUT-THROUGH: Allocate route only after the full pkt arrives +// - SWITCH_ALLOC: Algorithm to allocate the switch +// * PRIO: Priority based. Priority: Y-dim > X-dim > Term +// * ROUND-ROBIN: Round robin input port allocation +// Signals: +// - *_axis_term_*: Terminal ports (master/slave) +// - *_axis_xdim_*: Inter-router X-dim connections (master/slave) +// - *_axis_ydim_*: Inter-router Y-dim connections (master/slave) +// + +module torus_2d_dor_router_single_sw #( + parameter WIDTH = 64, + parameter DIM_SIZE = 4, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_X = 0, + parameter [$clog2(DIM_SIZE)-1:0] XB_ADDR_Y = 0, + parameter TERM_BUFF_SIZE = 5, + parameter XB_BUFF_SIZE = 5, + parameter ROUTING_ALLOC = "WORMHOLE", + parameter SWITCH_ALLOC = "PRIO" +) ( + // Clocks and resets + input wire clk, + input wire reset, + + // Terminal connections + input wire [WIDTH-1:0] s_axis_term_tdata, + input wire s_axis_term_tlast, + input wire s_axis_term_tvalid, + output wire s_axis_term_tready, + output wire [WIDTH-1:0] m_axis_term_tdata, + output wire m_axis_term_tlast, + output wire m_axis_term_tvalid, + input wire m_axis_term_tready, + + // X-dimension inter-XB connections + input wire [WIDTH-1:0] s_axis_xdim_tdata, + input wire [0:0] s_axis_xdim_tdest, + input wire s_axis_xdim_tlast, + input wire s_axis_xdim_tvalid, + output wire s_axis_xdim_tready, + output wire [WIDTH-1:0] m_axis_xdim_tdata, + output wire [0:0] m_axis_xdim_tdest, + output wire m_axis_xdim_tlast, + output wire m_axis_xdim_tvalid, + input wire m_axis_xdim_tready, + + // Y-dimension inter-XB connections + input wire [WIDTH-1:0] s_axis_ydim_tdata, + input wire [0:0] s_axis_ydim_tdest, + input wire s_axis_ydim_tlast, + input wire s_axis_ydim_tvalid, + output wire s_axis_ydim_tready, + output wire [WIDTH-1:0] m_axis_ydim_tdata, + output wire [0:0] m_axis_ydim_tdest, + output wire m_axis_ydim_tlast, + output wire m_axis_ydim_tvalid, + input wire m_axis_ydim_tready +); + + //------------------------------------------------- + // Routing and switch allocation functions + //------------------------------------------------- + + // mesh_node_mapping.vh file contains the mapping between the node number + // and its XY coordinates. It is autogenerated and defines the node_to_xdst + // and node_to_ydst functions. + `include "mesh_node_mapping.vh" + + localparam [1:0] SW_DEST_TERM = 2'd0; + localparam [1:0] SW_DEST_XDIM = 2'd1; + localparam [1:0] SW_DEST_YDIM = 2'd2; + localparam [1:0] SW_NUM_DESTS = 2'd3; + + // The compute_switch_tdest function is the destination selector + // i.e. it will inspecte the bottom $clog2(DIM_SIZE)*2 bits of the + // first word of a packet and determine the destination of the packet. + function [2:0] compute_switch_tdest; + input [WIDTH-1:0] header; + reg [$clog2(DIM_SIZE)-1:0] xdst, ydst; + reg signed [$clog2(DIM_SIZE):0] xdiff, ydiff; + begin + xdst = node_to_xdst(header); + ydst = node_to_ydst(header); + xdiff = xdst - XB_ADDR_X; + ydiff = ydst - XB_ADDR_Y; + // Routing logic + // - MSB is the VC, 2 LSBs are the router destination + // - Long journeys get VC = 1 to bypass local traffic + if (xdiff == 'd0 && ydiff == 'd0) begin + compute_switch_tdest = {1'b0 /* VC don't care */, SW_DEST_TERM}; + end else if (xdiff != 'd0) begin + compute_switch_tdest = {(xdiff < 0), SW_DEST_XDIM}; + end else begin + compute_switch_tdest = {(ydiff < 0), SW_DEST_YDIM}; + end + //$display("xdst=%d, ydst=%d, xaddr=%d, yaddr=%d, dst=%d", xdst, ydst, XB_ADDR_X, XB_ADDR_Y, compute_switch_tdest); + end + endfunction + + // The compute_switch_alloc function is the switch allocation function + // i.e. it chooses which input port reserves the switch for packet transfer. + // After the switch is allocated, all other ports will be backpressured until + // the packet finishes transferring. + function [1:0] compute_switch_alloc; + input [2:0] pkt_waiting; + input [1:0] last_alloc; + begin + if (pkt_waiting == 3'b000) begin + compute_switch_alloc = SW_DEST_TERM; + end else if (pkt_waiting == 3'b001) begin + compute_switch_alloc = SW_DEST_TERM; + end else if (pkt_waiting == 3'b010) begin + compute_switch_alloc = SW_DEST_XDIM; + end else if (pkt_waiting == 3'b100) begin + compute_switch_alloc = SW_DEST_YDIM; + end else begin + if (SWITCH_ALLOC == "PRIO") begin + // Priority: Y-dim > X-dim > Term + if (pkt_waiting[SW_DEST_YDIM]) + compute_switch_alloc = SW_DEST_YDIM; + else if (pkt_waiting[SW_DEST_XDIM]) + compute_switch_alloc = SW_DEST_XDIM; + else + compute_switch_alloc = SW_DEST_TERM; + end else begin + // Round-robin + if (pkt_waiting[(last_alloc + 3'd1) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd1) % SW_NUM_DESTS; + else if (pkt_waiting[(last_alloc + 3'd2) % SW_NUM_DESTS]) + compute_switch_alloc = (last_alloc + 3'd2) % SW_NUM_DESTS; + else + compute_switch_alloc = last_alloc; + end + end + end + endfunction + + //------------------------------------------------- + // Ingress buffers + //------------------------------------------------- + wire [WIDTH-1:0] ydim_in_data , xdim_in_data , term_in_data ; + wire [2:0] ydim_in_dest , xdim_in_dest , term_in_dest ; + wire ydim_in_last , xdim_in_last , term_in_last ; + wire ydim_in_valid, xdim_in_valid, term_in_valid; + wire ydim_in_ready, xdim_in_ready, term_in_ready; + + // Data coming in from the terminal is gated until a full packet arrives + // in order to minimize the switch allocation time per packet. + axi_packet_gate #( + .WIDTH(WIDTH), .SIZE(TERM_BUFF_SIZE) + ) term_in_pkt_gate_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata (s_axis_term_tdata), + .i_tlast (s_axis_term_tlast), + .i_tvalid (s_axis_term_tvalid), + .i_tready (s_axis_term_tready), + .i_terror (1'b0), + .o_tdata (term_in_data), + .o_tlast (term_in_last), + .o_tvalid (term_in_valid), + .o_tready (term_in_ready) + ); + assign term_in_dest = compute_switch_tdest(term_in_data); + + // The XY directions have buffers with 2 virtual channels to minimize the + // possibility of a deadlock. + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) xdim_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_xdim_tdata), + .s_axis_tdest (s_axis_xdim_tdest), + .s_axis_tlast (s_axis_xdim_tlast), + .s_axis_tvalid (s_axis_xdim_tvalid), + .s_axis_tready (s_axis_xdim_tready), + .m_axis_tdata (xdim_in_data), + .m_axis_tlast (xdim_in_last), + .m_axis_tvalid (xdim_in_valid), + .m_axis_tready (xdim_in_ready) + ); + assign xdim_in_dest = compute_switch_tdest(xdim_in_data); + + axis_ingress_vc_buff #( + .WIDTH(WIDTH), .NUM_VCS(2), + .SIZE(XB_BUFF_SIZE), + .ROUTING(ROUTING_ALLOC) + ) ydim_in_vc_buf_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata (s_axis_ydim_tdata ), + .s_axis_tdest (s_axis_ydim_tdest ), + .s_axis_tlast (s_axis_ydim_tlast ), + .s_axis_tvalid (s_axis_ydim_tvalid), + .s_axis_tready (s_axis_ydim_tready), + .m_axis_tdata (ydim_in_data ), + .m_axis_tlast (ydim_in_last ), + .m_axis_tvalid (ydim_in_valid), + .m_axis_tready (ydim_in_ready) + ); + assign ydim_in_dest = compute_switch_tdest(ydim_in_data); + + //------------------------------------------------- + // Switch + //------------------------------------------------- + + // Track the input packet state + localparam [0:0] PKT_ST_HEAD = 1'b0; + localparam [0:0] PKT_ST_BODY = 1'b1; + reg [0:0] pkt_state = PKT_ST_HEAD; + + // The switch only accept packets on a single port at a time. + wire sw_in_ready = |({ydim_in_ready, xdim_in_ready, term_in_ready}); + wire sw_in_valid = |({ydim_in_valid, xdim_in_valid, term_in_valid}); + wire sw_in_last = |({ydim_in_last&ydim_in_valid, xdim_in_last&xdim_in_valid, term_in_last&term_in_valid}); + + always @(posedge clk) begin + if (reset) begin + pkt_state <= PKT_ST_HEAD; + end else if (sw_in_valid & sw_in_ready) begin + pkt_state <= sw_in_last ? PKT_ST_HEAD : PKT_ST_BODY; + end + end + + // The switch requires the allocation to stay valid until the + // end of the packet. We also might need to keep the previous + // packet's allocation to compute the current one + wire [1:0] switch_alloc; + reg [1:0] prev_switch_alloc = SW_DEST_TERM; + reg [1:0] pkt_switch_alloc = SW_DEST_TERM; + + always @(posedge clk) begin + if (reset) begin + prev_switch_alloc <= SW_DEST_TERM; + pkt_switch_alloc <= SW_DEST_TERM; + end else if (sw_in_valid & sw_in_ready) begin + if (pkt_state == PKT_ST_HEAD) + pkt_switch_alloc <= switch_alloc; + if (sw_in_last) + prev_switch_alloc <= switch_alloc; + end + end + + assign switch_alloc = (sw_in_valid && pkt_state == PKT_ST_HEAD) ? + compute_switch_alloc({ydim_in_valid, xdim_in_valid, term_in_valid}, prev_switch_alloc) : + pkt_switch_alloc; + + wire term_tdest_discard; + axis_switch #( + .DATA_W(WIDTH), .DEST_W(1), .IN_PORTS(3), .OUT_PORTS(3) + ) switch_i ( + .clk (clk), + .reset (reset), + .s_axis_tdata ({ydim_in_data , xdim_in_data , term_in_data }), + .s_axis_tdest ({ydim_in_dest , xdim_in_dest , term_in_dest }), + .s_axis_tlast ({ydim_in_last , xdim_in_last , term_in_last }), + .s_axis_tvalid ({ydim_in_valid, xdim_in_valid, term_in_valid}), + .s_axis_tready ({ydim_in_ready, xdim_in_ready, term_in_ready}), + .s_axis_alloc (switch_alloc), + .m_axis_tdata ({m_axis_ydim_tdata, m_axis_xdim_tdata, m_axis_term_tdata }), + .m_axis_tdest ({m_axis_ydim_tdest, m_axis_xdim_tdest, term_tdest_discard}), + .m_axis_tlast ({m_axis_ydim_tlast, m_axis_xdim_tlast, m_axis_term_tlast }), + .m_axis_tvalid ({m_axis_ydim_tvalid, m_axis_xdim_tvalid, m_axis_term_tvalid}), + .m_axis_tready ({m_axis_ydim_tready, m_axis_xdim_tready, m_axis_term_tready}) + ); + +endmodule + |