aboutsummaryrefslogtreecommitdiffstats
path: root/fpga/usrp3/tools/utils/rfnoc-system-sim
diff options
context:
space:
mode:
Diffstat (limited to 'fpga/usrp3/tools/utils/rfnoc-system-sim')
-rw-r--r--fpga/usrp3/tools/utils/rfnoc-system-sim/.gitignore1
-rw-r--r--fpga/usrp3/tools/utils/rfnoc-system-sim/README6
-rwxr-xr-xfpga/usrp3/tools/utils/rfnoc-system-sim/colosseum_models.py593
-rwxr-xr-xfpga/usrp3/tools/utils/rfnoc-system-sim/ni_hw_models.py261
-rw-r--r--fpga/usrp3/tools/utils/rfnoc-system-sim/rfnocsim.py757
-rwxr-xr-xfpga/usrp3/tools/utils/rfnoc-system-sim/sim_colosseum.py142
6 files changed, 1760 insertions, 0 deletions
diff --git a/fpga/usrp3/tools/utils/rfnoc-system-sim/.gitignore b/fpga/usrp3/tools/utils/rfnoc-system-sim/.gitignore
new file mode 100644
index 000000000..0d20b6487
--- /dev/null
+++ b/fpga/usrp3/tools/utils/rfnoc-system-sim/.gitignore
@@ -0,0 +1 @@
+*.pyc
diff --git a/fpga/usrp3/tools/utils/rfnoc-system-sim/README b/fpga/usrp3/tools/utils/rfnoc-system-sim/README
new file mode 100644
index 000000000..514e9e43b
--- /dev/null
+++ b/fpga/usrp3/tools/utils/rfnoc-system-sim/README
@@ -0,0 +1,6 @@
+Dependencies:
+- python2
+- graphviz
+- python-graphviz
+- python-numpy
+- python-matplotlib
diff --git a/fpga/usrp3/tools/utils/rfnoc-system-sim/colosseum_models.py b/fpga/usrp3/tools/utils/rfnoc-system-sim/colosseum_models.py
new file mode 100755
index 000000000..f13b1b194
--- /dev/null
+++ b/fpga/usrp3/tools/utils/rfnoc-system-sim/colosseum_models.py
@@ -0,0 +1,593 @@
+#!/usr/bin/env python
+#
+# Copyright 2016 Ettus Research
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import rfnocsim
+import math
+import ni_hw_models as hw
+
+class ColGlobals():
+ BPI = 4 # Number of bytes per sample or coefficient
+ BPP = 1024 # Bytes per packet
+ MIN_SAMP_HOPS = 1 # Minimum number of hops an RX sample will take before it is used to compute a PP
+ MAX_SAMP_HOPS = 3 # Maximum number of hops an RX sample will take before it is used to compute a PP
+ MIN_PP_HOPS = 0 # Minimum number of hops a PP will take before it is used to compute a TX sample
+ MAX_PP_HOPS = 1 # Maximum number of hops a PP will take before it is used to compute a TX sample
+ ELASTIC_BUFF_FULLNESS = 0.5
+
+class PartialContribComputer(rfnocsim.Function):
+ """
+ Simulation model for function that computes the contribution of radio chans on other radio chans.
+ This function computes a NxM dot product of FFTs, one bin at a time.
+ Features:
+ - Supports computing the product in multiple cycles (for resource reuse)
+ - Supports deinterleaving data in streams (i.e. is Radio 0+1 data comes in thru the same ethernet)
+
+ Args:
+ sim_core: Simulator core object
+ name: Name of this function
+ size: Number of chans (inputs) for which contribution partial products are computed
+ fft_size: The length of the FFT in bins
+ dst_chans: Computes the contribution of the input chans on these dst_chans
+ items_per_stream: How many channels per stream can this function deinterleave?
+ ticks_per_exec: How many ticks for the function to generate a full output set
+ """
+ def __init__(self, sim_core, name, size, dst_chans, items_per_stream, app_settings):
+ ticks_per_exec = 1 # This function will run once every tick. No multi-cycle paths here.
+ rfnocsim.Function.__init__(self, sim_core, name, size, int(len(dst_chans)/items_per_stream), ticks_per_exec)
+ self.items_per_stream = items_per_stream # Each stream contains data from n radio chans
+ self.dst_chans = dst_chans # Where should the individual products go?
+ # This block has to buffer enough data to ensure
+ # sample alignment. How deep should those buffers be?
+ sync_buff_depth = (((ColGlobals.MAX_SAMP_HOPS - ColGlobals.MIN_SAMP_HOPS) *
+ hw.Bee7Fpga.IO_LN_LATENCY * float(app_settings['samp_rate'])) / ColGlobals.ELASTIC_BUFF_FULLNESS)
+
+ # Adder latency: log2(radix) adder stages + 2 pipeline flops
+ latency = math.ceil(math.log(size/len(dst_chans), 2)) + 2
+ # Synchronization latency based on buffer size
+ latency += (sync_buff_depth * ColGlobals.ELASTIC_BUFF_FULLNESS) * (self.get_tick_rate() / float(app_settings['samp_rate']))
+ # Packet alignment latency
+ latency += ColGlobals.BPP * (self.get_tick_rate() / hw.Bee7Fpga.IO_LN_BW)
+ self.estimate_resources(size*items_per_stream, len(dst_chans), app_settings, sync_buff_depth*size, latency)
+
+ def estimate_resources(self, N, M, app_settings, sync_buff_total_samps, pre_filt_latency):
+ rscrs = rfnocsim.HwRsrcs()
+
+ DSP_BLOCKS_PER_MAC = 3 # DSP blocks for a scaled complex MAC
+ MAX_DSP_RATE = 400e6 # Max clock rate for a DSP48E block
+ MAX_UNROLL_DEPTH = 2 # How many taps (or FFT bins) to compute in parallel?
+ COEFF_SETS = 1 # We need two copies of coefficients one live
+ # and one buffered for dynamic reload. If both
+ # live in BRAM, this should be 2. If the live
+ # set lives in registers, this should be 1
+
+ samp_rate = float(app_settings['samp_rate'])
+ dsp_cyc_per_samp = MAX_DSP_RATE / samp_rate
+
+ if app_settings['domain'] == 'time':
+ fir_taps = app_settings['fir_taps']
+ if (fir_taps <= dsp_cyc_per_samp):
+ unroll_factor = 1
+ dsp_rate = samp_rate * fir_taps
+ else:
+ unroll_factor = math.ceil((1.0 * fir_taps) / dsp_cyc_per_samp)
+ dsp_rate = MAX_DSP_RATE
+ if (unroll_factor > MAX_UNROLL_DEPTH):
+ raise self.SimCompError('Too many FIR coefficients! Reached loop unroll limit.')
+
+ rscrs.add('DSP', DSP_BLOCKS_PER_MAC * unroll_factor * N * M)
+ rscrs.add('BRAM_18kb', math.ceil(ColGlobals.BPI * app_settings['fir_dly_line'] / hw.Bee7Fpga.BRAM_BYTES) * N * M) # FIR delay line memory
+ rscrs.add('BRAM_18kb', math.ceil(ColGlobals.BPI * COEFF_SETS * fir_taps * unroll_factor * N * M / hw.Bee7Fpga.BRAM_BYTES)) # Coefficient storage
+
+ samp_per_tick = dsp_rate / self.get_tick_rate()
+ self.update_latency(func=pre_filt_latency + (fir_taps / (samp_per_tick * unroll_factor)))
+ else:
+ fft_size = app_settings['fft_size']
+ rscrs.add('DSP', DSP_BLOCKS_PER_MAC * N * M * MAX_UNROLL_DEPTH) # MACs
+ rscrs.add('BRAM_18kb', math.ceil(ColGlobals.BPI * N * M * fft_size * COEFF_SETS / hw.Bee7Fpga.BRAM_BYTES)) # Coeff storage
+
+ samp_per_tick = MAX_DSP_RATE / self.get_tick_rate()
+ self.update_latency(func=pre_filt_latency + (fft_size / samp_per_tick))
+
+ rscrs.add('BRAM_18kb', math.ceil(ColGlobals.BPI * sync_buff_total_samps / hw.Bee7Fpga.BRAM_BYTES))
+ self.update_rsrcs(rscrs)
+
+ def do_func(self, in_data):
+ """
+ Gather FFT data from "size" channels, compute a dot product with the coeffieicnt
+ matrix and spit the partial products out. The dot product is computed for each
+ FFT bin serially.
+ """
+ out_data = list()
+ src_chans = []
+ # Iterate over each input
+ for di in in_data:
+ if len(di.items) != self.items_per_stream:
+ raise RuntimeError('Incorrect items per stream. Expecting ' + str(self.items_per_stream))
+ # Deinterleave data
+ for do in range(len(di.items)):
+ (sid, coords) = rfnocsim.DataStream.submatrix_parse(di.items[do])
+ if sid != 'rx':
+ raise RuntimeError('Incorrect items. Expecting radio data (rx) but got ' + sid)
+ src_chans.extend(coords[0])
+ bpi = in_data[0].bpi
+ count = in_data[0].count
+ # Iterate through deinterleaved channels
+ for i in range(0, len(self.dst_chans), self.items_per_stream):
+ items = []
+ for j in range(self.items_per_stream):
+ # Compute partial products:
+ # pp = partial product of "src_chans" on "self.dst_chans[i+j]"
+ items.append(rfnocsim.DataStream.submatrix_gen('pp', [src_chans, self.dst_chans[i+j]]))
+ out_data.append(self.create_outdata_stream(bpi, items, count))
+ return out_data
+
+class PartialContribCombiner(rfnocsim.Function):
+ """
+ Simulation model for function that adds multiple partial contributions (products) into a larger
+ partial product. The combiner can optionally reduce a very large product into a smaller one.
+ Ex: pp[31:0,i] (contribution on chan 0..31 on i) can alias to tx[i] if there are 32 channels.
+
+ Args:
+ sim_core: Simulator core object
+ name: Name of this function
+ radix: Number of partial products that are combined (Number of inputs)
+ reducer_filter: A tuple that represents what pp channels to alias to what
+ items_per_stream: How many channels per stream can this function deinterleave?
+ """
+
+ def __init__(self, sim_core, name, radix, app_settings, reducer_filter = (None, None), items_per_stream = 2):
+ rfnocsim.Function.__init__(self, sim_core, name, radix, 1)
+ self.radix = radix
+ self.reducer_filter = reducer_filter
+ self.items_per_stream = items_per_stream
+
+ # This block has to buffer enough data to ensure
+ # sample alignment. How deep should those buffers be?
+ sync_buff_depth = (((ColGlobals.MAX_PP_HOPS - ColGlobals.MIN_PP_HOPS) *
+ hw.Bee7Fpga.IO_LN_LATENCY * float(app_settings['samp_rate'])) / ColGlobals.ELASTIC_BUFF_FULLNESS)
+ # Figure out latency based on sync buffer and delay line
+ latency = math.ceil(math.log(radix, 2)) + 2 # log2(radix) adder stages + 2 pipeline flops
+ # Synchronization latency based on buffer size
+ latency += (sync_buff_depth * ColGlobals.ELASTIC_BUFF_FULLNESS) * (self.get_tick_rate() / float(app_settings['samp_rate']))
+ # Packet alignment latency
+ latency += ColGlobals.BPP * (self.get_tick_rate() / hw.Bee7Fpga.IO_LN_BW)
+
+ self.update_latency(func=latency)
+ self.estimate_resources(radix, sync_buff_depth)
+
+ def estimate_resources(self, radix, sync_buff_depth):
+ rscrs = rfnocsim.HwRsrcs()
+ # Assume that pipelined adders are inferred in logic (not DSP)
+ # Assume that buffering uses BRAM
+ rscrs.add('BRAM_18kb', math.ceil(ColGlobals.BPI * sync_buff_depth * radix / hw.Bee7Fpga.BRAM_BYTES))
+ self.update_rsrcs(rscrs)
+
+ def do_func(self, in_data):
+ """
+ Gather partial dot products from inputs, add them together and spit them out
+ Perform sanity check to ensure that we are adding the correct things
+ """
+ out_chans = dict()
+ # Iterate over each input
+ for di in in_data:
+ if len(di.items) != self.items_per_stream:
+ raise self.SimCompError('Incorrect items per stream. Expecting ' + str(self.items_per_stream))
+ # Deinterleave data
+ for do in range(len(di.items)):
+ (sid, coords) = rfnocsim.DataStream.submatrix_parse(di.items[do])
+ if sid == 'null':
+ continue
+ elif sid != 'pp':
+ raise self.SimCompError('Incorrect items. Expecting partial produts (pp) but got ' + sid)
+ if len(coords[1]) != 1:
+ raise self.SimCompError('Incorrect partial product. Target must be a single channel')
+ if coords[1][0] in out_chans:
+ out_chans[coords[1][0]].extend(coords[0])
+ else:
+ out_chans[coords[1][0]] = coords[0]
+ # Check if keys (targets) for partial products == items_per_stream
+ if len(list(out_chans.keys())) != self.items_per_stream:
+ raise self.SimCompError('Inconsistent partial products. Too many targets.')
+ # Verify that all influencers for each target are consistent
+ if not all(x == list(out_chans.values())[0] for x in list(out_chans.values())):
+ raise self.SimCompError('Inconsistent partial products. Influencers dont match.')
+ contrib_chans = list(out_chans.values())[0]
+ # Combine partial products and return
+ out_items = []
+ for ch in list(out_chans.keys()):
+ if sorted(self.reducer_filter[0]) == sorted(contrib_chans):
+ out_items.append(rfnocsim.DataStream.submatrix_gen(self.reducer_filter[1], [ch]))
+ else:
+ out_items.append(rfnocsim.DataStream.submatrix_gen('pp', [list(out_chans.values())[0], ch]))
+ return self.create_outdata_stream(in_data[0].bpi, out_items, in_data[0].count)
+
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# NOTE: The Torus Topology has not been maintained. Use at your own risk
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+class Topology_2D_4x4_Torus:
+ @classmethod
+ def config_bitstream(cls, bee7fpga, app_settings, in_chans, out_chans, total_num_chans, is_radio_node):
+ if len(in_chans) != 64:
+ raise bee7fpga.SimCompError('in_chans must be 64 channels wide. Got ' + str(len(in_chans)))
+ if len(out_chans) != 16:
+ raise bee7fpga.SimCompError('out_chans must be 16 channels wide. Got ' + str(len(out_chans)))
+ GRP_LEN = 16 / 2 # 2 radio channesl per USRP
+
+ # Broadcast raw data streams to all internal and external FPGAs
+ for i in range(GRP_LEN):
+ in_ln = bee7fpga.EXT_IO_LANES[bee7fpga.BP_BASE+i]
+ bee7fpga.sim_core.connect(bee7fpga.serdes_i[in_ln], 0, bee7fpga.serdes_o[bee7fpga.EW_IO_LANES[i]], 0)
+ bee7fpga.sim_core.connect(bee7fpga.serdes_i[in_ln], 0, bee7fpga.serdes_o[bee7fpga.NS_IO_LANES[i]], 0)
+ bee7fpga.sim_core.connect(bee7fpga.serdes_i[in_ln], 0, bee7fpga.serdes_o[bee7fpga.XX_IO_LANES[i]], 0)
+ bee7fpga.sim_core.connect(bee7fpga.serdes_i[in_ln], 0, bee7fpga.serdes_o[bee7fpga.EXT_IO_LANES[bee7fpga.BP_BASE+8+i]], 0)
+ # Create an internal bus to hold the generated partial products
+ bee7fpga.pp_bus = dict()
+ for i in range(GRP_LEN):
+ bee7fpga.pp_bus[i] = rfnocsim.Channel(bee7fpga.sim_core, '%s/_INTERNAL_PP_%02d' % (bee7fpga.name,i))
+ # We need to compute partial products of the data that is broadcast to us
+ # pp_input_lanes represents the IO lanes that hold this data
+ pp_input_lanes = bee7fpga.EXT_IO_LANES[bee7fpga.BP_BASE:bee7fpga.BP_BASE+GRP_LEN] + \
+ bee7fpga.EW_IO_LANES[0:GRP_LEN] + bee7fpga.NS_IO_LANES[0:GRP_LEN] + bee7fpga.XX_IO_LANES[0:GRP_LEN]
+ # The function that computes the partial products
+ func = PartialContribComputer(
+ sim_core=bee7fpga.sim_core, name=bee7fpga.name + '/pp_computer/', size=len(pp_input_lanes),
+ dst_chans=out_chans,
+ items_per_stream=2, app_settings=app_settings)
+ for i in range(len(pp_input_lanes)):
+ bee7fpga.sim_core.connect(bee7fpga.serdes_i[pp_input_lanes[i]], 0, func, i)
+ for i in range(GRP_LEN): #Outputs of function
+ bee7fpga.sim_core.connect(func, i, bee7fpga.pp_bus[i], 0)
+ bee7fpga.add_function(func)
+ # Add a function combine all partial products (one per IO lane)
+ for i in range(GRP_LEN):
+ func = PartialContribCombiner(
+ sim_core=bee7fpga.sim_core, name=bee7fpga.name + '/pp_combiner_%d/' % (i),
+ radix=2, app_settings=app_settings, reducer_filter=(list(range(total_num_chans)), 'tx'))
+ # Partial products generated internally have to be added to a partial
+ # sum coming from outside
+ bee7fpga.sim_core.connect(bee7fpga.serdes_i[bee7fpga.EXT_IO_LANES[bee7fpga.FP_BASE+i]], 0, func, 0)
+ bee7fpga.sim_core.connect(bee7fpga.pp_bus[i], 0, func, 1)
+ # If this FPGA is hooked up to the radio then send partial products
+ # back to when samples came from. Otherwise send it out to the PP output bus
+ if is_radio_node:
+ bee7fpga.sim_core.connect(func, 0, bee7fpga.serdes_o[bee7fpga.EXT_IO_LANES[bee7fpga.BP_BASE+i]], 0)
+ else:
+ bee7fpga.sim_core.connect(func, 0, bee7fpga.serdes_o[bee7fpga.EXT_IO_LANES[bee7fpga.FP_BASE+8+i]], 0)
+ bee7fpga.add_function(func)
+
+ @classmethod
+ def connect(cls, sim_core, usrps, bee7blades, hosts, app_settings):
+ USRPS_PER_BLADE = 32
+
+ # Create NULL source of "zero" partial products
+ null_items = ['null[(0);(0)]', 'null[(0);(0)]']
+ null_src = rfnocsim.Producer(sim_core, 'NULL_SRC', 4, null_items)
+ if app_settings['domain'] == 'frequency':
+ null_src.set_rate(app_settings['samp_rate']*(1.0 +
+ (float(app_settings['fft_overlap'])/app_settings['fft_size'])))
+ else:
+ null_src.set_rate(app_settings['samp_rate'])
+
+ # Reshape BEE7s
+ # The blades are arranged in 2D Torus network with 4 blades across
+ # each dimension (4x4 = 16)
+ bee7grid = []
+ for r in range(4):
+ bee7row = []
+ for c in range(4):
+ blade = bee7blades[4*r + c]
+ pp_chans = list(range(64*c,64*(c+1)))
+ for i in range(4):
+ Topology_2D_4x4_Torus.config_bitstream(
+ blade.fpgas[i], app_settings, pp_chans, pp_chans[i*16:(i+1)*16], 256, (r==c))
+ bee7row.append(blade)
+ bee7grid.append(bee7row)
+
+ # USRP-Bee7 Connections
+ # Blades across the diagonal are connected to USRPs
+ for b in range(4):
+ for u in range(USRPS_PER_BLADE):
+ sim_core.connect_bidir(
+ usrps[USRPS_PER_BLADE*b + u], 0, bee7grid[b][b],
+ len(hw.Bee7Fpga.EXT_IO_LANES)*(u/8) + hw.Bee7Fpga.BP_BASE+(u%8), 'SAMP')
+ sim_core.connect_bidir(
+ hosts[b], 0, bee7grid[b][b], hw.Bee7Fpga.FP_BASE+8, 'CONFIG', ['blue','blue'])
+
+ # Bee7-Bee7 Connections
+ null_srcs = []
+ for r in range(4): # Traverse across row
+ for c in range(4): # Traverse across col
+ for f in range(4):
+ samp_in_base = len(hw.Bee7Fpga.EXT_IO_LANES)*f + hw.Bee7Fpga.BP_BASE
+ samp_out_base = len(hw.Bee7Fpga.EXT_IO_LANES)*f + hw.Bee7Fpga.BP_BASE+8
+ pp_in_base = len(hw.Bee7Fpga.EXT_IO_LANES)*f + hw.Bee7Fpga.FP_BASE
+ pp_out_base = len(hw.Bee7Fpga.EXT_IO_LANES)*f + hw.Bee7Fpga.FP_BASE+8
+ if r != c:
+ sim_core.connect_multi_bidir(
+ bee7grid[r][(c+3)%4], list(range(samp_out_base,samp_out_base+8)),
+ bee7grid[r][c], list(range(samp_in_base,samp_in_base+8)),
+ 'SAMP_O2I', ['black','blue'])
+ sim_core.connect_multi_bidir(
+ bee7grid[r][c], list(range(pp_out_base,pp_out_base+8)),
+ bee7grid[(r+1)%4][c], list(range(pp_in_base,pp_in_base+8)),
+ 'PP_O2I', ['black','blue'])
+ else:
+ for i in range(8):
+ sim_core.connect(null_src, 0, bee7grid[(r+1)%4][c], pp_in_base + i)
+
+class Topology_3D_4x4_FLB:
+ @classmethod
+ def get_radio_num(cls, router_addr, radio_idx, concentration):
+ """
+ Returns the global radio index given local radio info
+
+ (global_radio_idx) = get_radio_num(router_addr, radio_idx, concentration) where:
+ - router_addr: Address of the current FPGA (router) in 3-D space
+ - radio_idx: The local index of the radio for the current router_addr
+ - concentration: Number of USRPs connected to each router
+ """
+ DIM_SIZE = 4
+ multiplier = concentration
+ radio_num = 0
+ for dim in ['Z','Y','X']:
+ radio_num += router_addr[dim] * multiplier
+ multiplier *= DIM_SIZE
+ return radio_num + radio_idx
+
+ @classmethod
+ def get_portmap(cls, node_addr):
+ """
+ Returns the router and terminal connections for the current FPGA
+
+ (router_map, terminal_map) = get_portmap(node_addr) where:
+ - node_addr: Address of the current FPGA in 3-D space
+ - router_map: A double map indexed by the dimension {X,Y,Z} and the
+ FPGA address in that dimension that returns the Aurora
+ lane index that connects the current node to the neighbor.
+ Example: if node_addr = [0,0,0] then router_map['X'][1] will
+ hold the IO lane index that connects the current node with
+ its X-axis neighbor with address 1
+ - terminal_map: A single map that maps a dimension {X,Y,Z} to the starting
+ IO lane index for terminals (like USRPs) in that dimension.
+ A terminal is a leaf node in the network.
+ """
+ router_map = dict()
+ terminal_map = dict()
+ # If "node_addr" is the address of the current FPGA in the (X,Y,Z) space,
+ # then build a list of other addresses (neighbors) in each dimension
+ DIM_SIZE = 4
+ for dim in ['X','Y','Z']:
+ all_addrs = list(range(DIM_SIZE))
+ all_addrs.remove(node_addr[dim])
+ router_map[dim] = dict()
+ for dst in all_addrs:
+ router_map[dim][dst] = 0 # Assign lane index as 0 for now
+ # Assign Aurora lanes for all external connections between BEE7s
+ io_base = hw.Bee7Fpga.EXT_IO_LANES[0]
+
+ # ---- X-axis ----
+ # All BEE7s in the X dimension are connected via the RTM
+ # The fist quad on the RTM is reserved for SFP+ peripherals like
+ # the USRPs, Ethernet switch ports, etc
+ # All others are used for inter BEE connections over QSFP+
+ terminal_map['X'] = io_base + hw.Bee7Fpga.BP_BASE
+ xdst = terminal_map['X'] + DIM_SIZE
+ for dst in router_map['X']:
+ router_map['X'][dst] = xdst
+ xdst += DIM_SIZE
+
+ # ---- Z-axis ----
+ # All BEE7s in the Z dimension are connected via FMC IO cards (front panel)
+ # To be symmetric with the X-axis the first quad on the FMC bus is also
+ # reserved (regardless of all quads being symmetric)
+ terminal_map['Z'] = io_base + hw.Bee7Fpga.FP_BASE
+ zdst = terminal_map['Z'] + DIM_SIZE
+ for dst in router_map['Z']:
+ router_map['Z'][dst] = zdst
+ zdst += DIM_SIZE
+
+ # ---- Y-axis ----
+ # Within a BEE7, FPGAs re connected in the Y-dimension:
+ # 0 - 1
+ # | X |
+ # 2 - 3
+ Y_LANE_MAP = {
+ 0:{1:hw.Bee7Fpga.EW_IO_LANES[0], 2:hw.Bee7Fpga.NS_IO_LANES[0], 3:hw.Bee7Fpga.XX_IO_LANES[0]},
+ 1:{0:hw.Bee7Fpga.EW_IO_LANES[0], 2:hw.Bee7Fpga.XX_IO_LANES[0], 3:hw.Bee7Fpga.NS_IO_LANES[0]},
+ 2:{0:hw.Bee7Fpga.NS_IO_LANES[0], 1:hw.Bee7Fpga.XX_IO_LANES[0], 3:hw.Bee7Fpga.EW_IO_LANES[0]},
+ 3:{0:hw.Bee7Fpga.XX_IO_LANES[0], 1:hw.Bee7Fpga.NS_IO_LANES[0], 2:hw.Bee7Fpga.EW_IO_LANES[0]}}
+ for dst in router_map['Y']:
+ router_map['Y'][dst] = Y_LANE_MAP[node_addr['Y']][dst]
+
+ return (router_map, terminal_map)
+
+ @classmethod
+ def config_bitstream(cls, bee7fpga, app_settings, fpga_addr):
+ """
+ Defines the FPGA behavior for the current FPGA. This function will make
+ create the necessary simulation functions, connect them to IO lanes and
+ define the various utilization metrics for the image.
+
+ config_bitstream(bee7fpga, app_settings, fpga_addr):
+ - bee7fpga: The FPGA simulation object being configured
+ - fpga_addr: Address of the FPGA in 3-D space
+ - app_settings: Application information
+ """
+ if len(fpga_addr) != 3:
+ raise bee7fpga.SimCompError('fpga_addr must be 3-dimensional. Got ' + str(len(fpga_addr)))
+
+ # Map that stores lane indices for all neighbors of this node
+ (router_map, terminal_map) = cls.get_portmap(fpga_addr)
+ # USRPs are connected in the X dimension (RTM) because it has SFP+ ports
+ base_usrp_lane = terminal_map['X']
+
+ DIM_WIDTH = 4 # Dimension size for the 3-D network
+ MAX_USRPS = 4 # Max USRPs that can possibly be connected to each FPGA
+ NUM_USRPS = 2 # Number of USRPs actually connected to each FPGA
+ CHANS_PER_USRP = 2 # How many radio channels does each USRP have
+ ALL_CHANS = list(range(pow(DIM_WIDTH, 3) * NUM_USRPS * CHANS_PER_USRP))
+
+ # Each FPGA will forward the sample stream from each USRP to all of its
+ # X-axis neighbors
+ for ri in router_map['X']:
+ for li in range(MAX_USRPS): # li = GT Lane index
+ bee7fpga.sim_core.connect(bee7fpga.serdes_i[base_usrp_lane + li], 0, bee7fpga.serdes_o[router_map['X'][ri] + li], 0)
+
+ # Consequently, this FPGA will receive the USRP sample streams from each of
+ # its X-axis neighbors. Define an internal bus to aggregate all the neighbor
+ # streams with the native ones. Order the streams such that each FPGA sees the
+ # same data streams.
+ bee7fpga.int_samp_bus = dict()
+ for i in range(DIM_WIDTH):
+ for li in range(MAX_USRPS): # li = GT Lane index
+ bee7fpga.int_samp_bus[(MAX_USRPS*i) + li] = rfnocsim.Channel(
+ bee7fpga.sim_core, '%s/_INT_SAMP_%02d' % (bee7fpga.name,(MAX_USRPS*i) + li))
+ ln_base = base_usrp_lane if i == fpga_addr['X'] else router_map['X'][i]
+ bee7fpga.sim_core.connect(bee7fpga.serdes_i[ln_base + li], 0, bee7fpga.int_samp_bus[(MAX_USRPS*i) + li], 0)
+
+ # Forward the X-axis aggregated sample streams to all Y-axis neighbors
+ for ri in router_map['Y']:
+ for li in range(DIM_WIDTH*DIM_WIDTH): # li = GT Lane index
+ bee7fpga.sim_core.connect(bee7fpga.int_samp_bus[li], 0, bee7fpga.serdes_o[router_map['Y'][ri] + li], 0)
+
+ # What partial products will this FPGA compute?
+ # Generate channel list to compute partial products
+ pp_chans = list()
+ for cg in range(DIM_WIDTH): # cg = Channel group
+ for r in range(NUM_USRPS):
+ radio_num = cls.get_radio_num({'X':fpga_addr['X'], 'Y':fpga_addr['Y'], 'Z':cg}, r, NUM_USRPS)
+ for ch in range(CHANS_PER_USRP):
+ pp_chans.append(radio_num*CHANS_PER_USRP + ch)
+
+ # Instantiate partial product computer
+ bee7fpga.func_pp_comp = PartialContribComputer(
+ sim_core=bee7fpga.sim_core, name=bee7fpga.name+'/pp_computer/', size=DIM_WIDTH*DIM_WIDTH*NUM_USRPS,
+ dst_chans=pp_chans,
+ items_per_stream=CHANS_PER_USRP, app_settings=app_settings)
+ bee7fpga.add_function(bee7fpga.func_pp_comp)
+
+ # Partial product computer takes inputs from all Y-axis links
+ for sg in range(DIM_WIDTH): # sg = Group of sexdectects
+ for qi in range(DIM_WIDTH): # qi = GT Quad index
+ for li in range(NUM_USRPS):
+ func_inln = (sg * DIM_WIDTH * NUM_USRPS) + (qi * NUM_USRPS) + li
+ if sg == fpga_addr['Y']:
+ bee7fpga.sim_core.connect(bee7fpga.int_samp_bus[(qi * DIM_WIDTH) + li], 0,
+ bee7fpga.func_pp_comp, func_inln)
+ else:
+ bee7fpga.sim_core.connect(bee7fpga.serdes_i[router_map['Y'][sg] + (qi * DIM_WIDTH) + li], 0,
+ bee7fpga.func_pp_comp, func_inln)
+
+ # Internal bus to hold aggregated partial products
+ bee7fpga.pp_bus = dict()
+ for i in range(DIM_WIDTH*NUM_USRPS):
+ bee7fpga.pp_bus[i] = rfnocsim.Channel(bee7fpga.sim_core, '%s/_INT_PP_%02d' % (bee7fpga.name,i))
+ bee7fpga.sim_core.connect(bee7fpga.func_pp_comp, i, bee7fpga.pp_bus[i], 0)
+
+ # Forward partial products to Z-axis neighbors
+ for ri in router_map['Z']:
+ for li in range(NUM_USRPS): # li = GT Lane index
+ bee7fpga.sim_core.connect(bee7fpga.pp_bus[ri*NUM_USRPS + li], 0, bee7fpga.serdes_o[router_map['Z'][ri] + li], 0)
+
+ # Instantiate partial product adder
+ bee7fpga.func_pp_comb = dict()
+ for i in range(NUM_USRPS):
+ bee7fpga.func_pp_comb[i] = PartialContribCombiner(
+ sim_core=bee7fpga.sim_core, name=bee7fpga.name + '/pp_combiner_%d/'%(i),
+ radix=DIM_WIDTH, app_settings=app_settings, reducer_filter=(ALL_CHANS, 'tx'),
+ items_per_stream=CHANS_PER_USRP)
+ bee7fpga.add_function(bee7fpga.func_pp_comb[i])
+
+ # Aggregate partial products from Z-axis neighbors
+ for u in range(NUM_USRPS):
+ for ri in range(DIM_WIDTH):
+ if ri in router_map['Z']:
+ bee7fpga.sim_core.connect(bee7fpga.serdes_i[router_map['Z'][ri] + u], 0, bee7fpga.func_pp_comb[u], ri)
+ else:
+ bee7fpga.sim_core.connect(bee7fpga.pp_bus[ri*NUM_USRPS + u], 0, bee7fpga.func_pp_comb[u], ri)
+
+ # Instantiate partial product adder
+ for u in range(NUM_USRPS):
+ bee7fpga.sim_core.connect(bee7fpga.func_pp_comb[u], 0, bee7fpga.serdes_o[base_usrp_lane + u], 0)
+
+ # Coefficient consumer
+ bee7fpga.coeff_sink = rfnocsim.Consumer(bee7fpga.sim_core, bee7fpga.name + '/coeff_sink', 10e9/8, 0.0)
+ bee7fpga.sim_core.connect(bee7fpga.serdes_i[terminal_map['X'] + NUM_USRPS], 0, bee7fpga.coeff_sink, 0)
+
+ @classmethod
+ def connect(cls, sim_core, usrps, bee7blades, hosts, app_settings):
+ NUM_USRPS = 2
+
+ # Reshape BEE7s
+ # The blades are arranged in 3D Flattened Butterfly configuration
+ # with a dimension width of 4. The X and Z dimension represent row, col
+ # and the Y dimension represents the internal connections
+ bee7grid = []
+ for r in range(4):
+ bee7row = []
+ for c in range(4):
+ blade = bee7blades[4*r + c]
+ for f in range(blade.NUM_FPGAS):
+ cls.config_bitstream(blade.fpgas[f], app_settings, {'X':r, 'Y':f, 'Z':c})
+ bee7row.append(blade)
+ bee7grid.append(bee7row)
+
+ # USRP-Bee7 Connections
+ # Blades across the diagonal are connected to USRPs
+ for x in range(4):
+ for y in range(4):
+ for z in range(4):
+ for u in range(NUM_USRPS):
+ usrp_num = cls.get_radio_num({'X':x,'Y':y,'Z':z}, u, NUM_USRPS)
+ (router_map, terminal_map) = cls.get_portmap({'X':x,'Y':y,'Z':z})
+ sim_core.connect_bidir(
+ usrps[usrp_num], 0,
+ bee7grid[x][z], hw.Bee7Blade.io_lane(y, terminal_map['X'] + u), 'SAMP')
+
+ # Bee7-Bee7 Connections
+ null_srcs = []
+ for row in range(4):
+ for col in range(4):
+ for fpga in range(4):
+ (src_map, t) = cls.get_portmap({'X':row,'Y':fpga,'Z':col})
+ for dst in range(4):
+ if row != dst:
+ (dst_map, t) = cls.get_portmap({'X':dst,'Y':fpga,'Z':col})
+ sim_core.connect_multi(
+ bee7grid[row][col],
+ list(range(hw.Bee7Blade.io_lane(fpga, src_map['X'][dst]), hw.Bee7Blade.io_lane(fpga, src_map['X'][dst]+4))),
+ bee7grid[dst][col],
+ list(range(hw.Bee7Blade.io_lane(fpga, dst_map['X'][row]), hw.Bee7Blade.io_lane(fpga, dst_map['X'][row]+4))),
+ 'SAMP')
+ if col != dst:
+ (dst_map, t) = cls.get_portmap({'X':row,'Y':fpga,'Z':dst})
+ sim_core.connect_multi(
+ bee7grid[row][col],
+ list(range(hw.Bee7Blade.io_lane(fpga, src_map['Z'][dst]), hw.Bee7Blade.io_lane(fpga, src_map['Z'][dst]+4))),
+ bee7grid[row][dst],
+ list(range(hw.Bee7Blade.io_lane(fpga, dst_map['Z'][col]), hw.Bee7Blade.io_lane(fpga, dst_map['Z'][col]+4))),
+ 'PP', 'blue')
+
+ # Host connection
+ for row in range(4):
+ for col in range(4):
+ for fpga in range(4):
+ (router_map, terminal_map) = cls.get_portmap({'X':row,'Y':row,'Z':col})
+ sim_core.connect_bidir(
+ hosts[row], col*4 + fpga,
+ bee7grid[row][col], hw.Bee7Blade.io_lane(fpga, terminal_map['X'] + NUM_USRPS), 'COEFF', 'red')
diff --git a/fpga/usrp3/tools/utils/rfnoc-system-sim/ni_hw_models.py b/fpga/usrp3/tools/utils/rfnoc-system-sim/ni_hw_models.py
new file mode 100755
index 000000000..815003c5f
--- /dev/null
+++ b/fpga/usrp3/tools/utils/rfnoc-system-sim/ni_hw_models.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python
+#
+# Copyright 2016 Ettus Research
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import rfnocsim
+import math
+
+class UsrpX310(rfnocsim.SimComp):
+ # Hardware specific constants
+ RADIO_LATENCY = 1e-6
+ IO_LATENCY = 1e-6
+ MAX_SAMP_RATE = 300e6 # Limited by 10GbE
+ BPI = 4 # Bytes per sample (item)
+
+ """
+ Simulation model for the USRP X310
+ - Has two producers and consumers of FFT data
+ - Computes bandwidth and latency using FFT size and overlap
+ """
+ def __init__(self, sim_core, index, app_settings):
+ rfnocsim.SimComp.__init__(self, sim_core, name='USRP_%03d' % (index), ctype=rfnocsim.comptype.hardware)
+ # USRP i carries data for radio 2i and 2i+1 interleaved into one stream
+ self.index = index
+ items = [rfnocsim.DataStream.submatrix_gen('rx', [2*index]),
+ rfnocsim.DataStream.submatrix_gen('rx', [2*index+1])]
+ # Samples are 4 bytes I and Q
+ latency = (self.RADIO_LATENCY + self.IO_LATENCY/2) * self.get_tick_rate()
+ if app_settings['domain'] == 'frequency':
+ # Max latency per direction depends on the FFT size and sample rate
+ latency += self.__get_fft_latency(
+ app_settings['fft_size'], app_settings['samp_rate'], self.get_tick_rate())
+ # An X310 Radio has two producers (RX data) and consumers (TX data) (i.e. two ethernet ports)
+ # Both ports can carry data from both radio frontends
+ self.sources = ([
+ rfnocsim.Producer(sim_core, self.name + '/TX0', self.BPI, items, self.MAX_SAMP_RATE, latency),
+ rfnocsim.Producer(sim_core, self.name + '/TX1', self.BPI, items, self.MAX_SAMP_RATE, latency)])
+ self.sinks = ([
+ rfnocsim.Consumer(sim_core, self.name + '/RX0', self.BPI * self.MAX_SAMP_RATE, latency),
+ rfnocsim.Consumer(sim_core, self.name + '/RX1', self.BPI * self.MAX_SAMP_RATE, latency)])
+ # The actual sample rate depends over the wire depends on the radio sample rate,
+ # the FFT size and FFT overlap
+ for src in self.sources:
+ if app_settings['domain'] == 'frequency':
+ src.set_rate(app_settings['samp_rate'] *
+ (1.0 + (float(app_settings['fft_overlap'])/app_settings['fft_size'])))
+ else:
+ src.set_rate(app_settings['samp_rate'])
+
+ def inputs(self, i, bind=False):
+ return self.sinks[i].inputs(0, bind)
+
+ def connect(self, i, dest):
+ self.sources[i].connect(0, dest)
+
+ def get_utilization(self, what):
+ return 0.0
+
+ def get_util_attrs(self):
+ return []
+
+ def validate(self, chan):
+ recvd = self.sinks[chan].get_items()
+ idxs = []
+ for i in recvd:
+ (str_id, idx) = rfnocsim.DataStream.submatrix_parse(i)
+ if str_id != 'tx':
+ raise RuntimeError(self.name + ' received incorrect TX data on channel ' + str(chan))
+ idxs.append(idx[0][0])
+ if sorted(idxs) != [self.index*2, self.index*2 + 1]:
+ raise RuntimeError(self.name + ' received incorrect TX data. Got: ' + str(sorted(idxs)))
+
+ def __get_fft_latency(self, fft_size, samp_rate, tick_rate):
+ FFT_CLK_RATE = 200e6
+ fft_cycles = {128:349, 256:611, 512:1133, 1024:2163, 2048:4221, 4096:8323}
+ latency = max(
+ fft_cycles[fft_size] / FFT_CLK_RATE, #Min time to leave FFT
+ fft_size / samp_rate) #Min time to enter FFT
+ return latency * tick_rate
+
+
+class Bee7Fpga(rfnocsim.SimComp):
+ """
+ Simulation model for a single Beecube BEE7 FPGA
+ - Type = hardware
+ - Contains 80 IO lanes per FPGA: 16 each to neighboring
+ FPGAs and 32 lanes going outside
+ """
+ # IO lanes (How the various IO lanes in an FPGA are allocated)
+ EW_IO_LANES = list(range(0,16))
+ NS_IO_LANES = list(range(16,32))
+ XX_IO_LANES = list(range(32,48))
+ EXT_IO_LANES = list(range(48,80))
+ # External IO lane connections
+ FP_BASE = 0 # Front panel FMC
+ FP_LANES = 16
+ BP_BASE = 16 # Backplane RTM
+ BP_LANES = 16
+
+ # Hardware specific constants
+ IO_LN_LATENCY = 1.5e-6
+ IO_LN_BW = 10e9/8
+ ELASTIC_BUFF_FULLNESS = 0.5
+ BRAM_BYTES = 18e3/8
+
+ def __init__(self, sim_core, name):
+ self.sim_core = sim_core
+ rfnocsim.SimComp.__init__(self, sim_core, name, rfnocsim.comptype.hardware)
+ # Max resources from Virtex7 datasheet
+ self.max_resources = rfnocsim.HwRsrcs()
+ self.max_resources.add('DSP', 3600)
+ self.max_resources.add('BRAM_18kb', 2940)
+ self.resources = rfnocsim.HwRsrcs()
+ # Each FPGA has 80 SERDES lanes
+ self.max_io = 80
+ self.serdes_i = dict()
+ self.serdes_o = dict()
+ # Each lane can carry at most 10GB/s
+ # Each SERDES needs to have some buffering. We assume elastic buffering (50% full on avg).
+ io_buff_size = (self.IO_LN_BW * self.IO_LN_LATENCY) / self.ELASTIC_BUFF_FULLNESS
+ # Worst case lane latency
+ lane_latency = self.IO_LN_LATENCY * self.get_tick_rate()
+ for i in range(self.max_io):
+ self.serdes_i[i] = rfnocsim.Channel(sim_core, self.__ioln_name(i)+'/I', self.IO_LN_BW, lane_latency / 2)
+ self.serdes_o[i] = rfnocsim.Channel(sim_core, self.__ioln_name(i)+'/O', self.IO_LN_BW, lane_latency / 2)
+ self.resources.add('BRAM_18kb', 1 + math.ceil(io_buff_size / self.BRAM_BYTES)) #input buffering per lane
+ self.resources.add('BRAM_18kb', 1) #output buffering per lane
+ # Other resources
+ self.resources.add('BRAM_18kb', 72) # BPS infrastructure + microblaze
+ self.resources.add('BRAM_18kb', 128) # 2 MIGs
+
+ self.functions = dict()
+
+ def inputs(self, i, bind=False):
+ return self.serdes_i[i].inputs(0, bind)
+
+ def connect(self, i, dest):
+ self.serdes_o[i].connect(0, dest)
+
+ def get_utilization(self, what):
+ if self.max_resources.get(what) != 0:
+ return self.resources.get(what) / self.max_resources.get(what)
+ else:
+ return 0.0
+
+ def get_util_attrs(self):
+ return ['DSP', 'BRAM_18kb']
+
+ def rename(self, name):
+ self.name = name
+
+ def add_function(self, func):
+ if func.name not in self.functions:
+ self.functions[func.name] = func
+ else:
+ raise RuntimeError('Function ' + self.name + ' already defined in ' + self.name)
+ self.resources.merge(func.get_rsrcs())
+
+ def __ioln_name(self, i):
+ if i in self.EW_IO_LANES:
+ return '%s/SER_EW_%02d'%(self.name,i-self.EW_IO_LANES[0])
+ elif i in self.NS_IO_LANES:
+ return '%s/SER_NS_%02d'%(self.name,i-self.NS_IO_LANES[0])
+ elif i in self.XX_IO_LANES:
+ return '%s/SER_XX_%02d'%(self.name,i-self.XX_IO_LANES[0])
+ else:
+ return '%s/SER_EXT_%02d'%(self.name,i-self.EXT_IO_LANES[0])
+
+class Bee7Blade(rfnocsim.SimComp):
+ """
+ Simulation model for a single Beecube BEE7
+ - Contains 4 FPGAs (fully connected with 16 lanes)
+ """
+ NUM_FPGAS = 4
+ # FPGA positions in the blade
+ NW_FPGA = 0
+ NE_FPGA = 1
+ SW_FPGA = 2
+ SE_FPGA = 3
+
+ def __init__(self, sim_core, index):
+ self.sim_core = sim_core
+ self.name = name='BEE7_%03d' % (index)
+ # Add FPGAs
+ names = ['FPGA_NW', 'FPGA_NE', 'FPGA_SW', 'FPGA_SE']
+ self.fpgas = []
+ for i in range(self.NUM_FPGAS):
+ self.fpgas.append(Bee7Fpga(sim_core, name + '/' + names[i]))
+ # Build a fully connected network of FPGA
+ # 4 FPGAs x 3 Links x 2 directions = 12 connections
+ self.sim_core.connect_multi_bidir(
+ self.fpgas[self.NW_FPGA], Bee7Fpga.EW_IO_LANES, self.fpgas[self.NE_FPGA], Bee7Fpga.EW_IO_LANES)
+ self.sim_core.connect_multi_bidir(
+ self.fpgas[self.NW_FPGA], Bee7Fpga.NS_IO_LANES, self.fpgas[self.SW_FPGA], Bee7Fpga.NS_IO_LANES)
+ self.sim_core.connect_multi_bidir(
+ self.fpgas[self.NW_FPGA], Bee7Fpga.XX_IO_LANES, self.fpgas[self.SE_FPGA], Bee7Fpga.XX_IO_LANES)
+ self.sim_core.connect_multi_bidir(
+ self.fpgas[self.NE_FPGA], Bee7Fpga.XX_IO_LANES, self.fpgas[self.SW_FPGA], Bee7Fpga.XX_IO_LANES)
+ self.sim_core.connect_multi_bidir(
+ self.fpgas[self.NE_FPGA], Bee7Fpga.NS_IO_LANES, self.fpgas[self.SE_FPGA], Bee7Fpga.NS_IO_LANES)
+ self.sim_core.connect_multi_bidir(
+ self.fpgas[self.SW_FPGA], Bee7Fpga.EW_IO_LANES, self.fpgas[self.SE_FPGA], Bee7Fpga.EW_IO_LANES)
+
+ def inputs(self, i, bind=False):
+ IO_PER_FPGA = len(Bee7Fpga.EXT_IO_LANES)
+ return self.fpgas[int(i/IO_PER_FPGA)].inputs(Bee7Fpga.EXT_IO_LANES[i%IO_PER_FPGA], bind)
+
+ def connect(self, i, dest):
+ IO_PER_FPGA = len(Bee7Fpga.EXT_IO_LANES)
+ self.fpgas[int(i/IO_PER_FPGA)].connect(Bee7Fpga.EXT_IO_LANES[i%IO_PER_FPGA], dest)
+
+ @staticmethod
+ def io_lane(fpga, fpga_lane):
+ IO_PER_FPGA = len(Bee7Fpga.EXT_IO_LANES)
+ return (fpga_lane - Bee7Fpga.EXT_IO_LANES[0]) + (fpga * IO_PER_FPGA)
+
+class ManagementHostandSwitch(rfnocsim.SimComp):
+ """
+ Simulation model for a management host computer
+ - Sources channel coefficients
+ - Configures radio
+ """
+ def __init__(self, sim_core, index, num_coeffs, switch_ports, app_settings):
+ rfnocsim.SimComp.__init__(self, sim_core, name='MGMT_HOST_%03d'%(index), ctype=rfnocsim.comptype.other)
+ if app_settings['domain'] == 'frequency':
+ k = app_settings['fft_size']
+ else:
+ k = app_settings['fir_taps']
+
+ self.sources = dict()
+ self.sinks = dict()
+ for l in range(switch_ports):
+ self.sources[l] = rfnocsim.Producer(
+ sim_core, '%s/COEFF_%d'%(self.name,l), 4, ['coeff_%03d[%d]'%(index,l)], (10e9/8)/switch_ports, 0)
+ self.sinks[l] = rfnocsim.Consumer(sim_core, self.name + '%s/ACK%d'%(self.name,l))
+ self.sources[l].set_rate(k*num_coeffs*app_settings['coherence_rate'])
+
+ def inputs(self, i, bind=False):
+ return self.sinks[i].inputs(0, bind)
+
+ def connect(self, i, dest):
+ self.sources[i].connect(0, dest)
+
+ def get_utilization(self, what):
+ return 0.0
+
+ def get_util_attrs(self):
+ return []
diff --git a/fpga/usrp3/tools/utils/rfnoc-system-sim/rfnocsim.py b/fpga/usrp3/tools/utils/rfnoc-system-sim/rfnocsim.py
new file mode 100644
index 000000000..d841cc06b
--- /dev/null
+++ b/fpga/usrp3/tools/utils/rfnoc-system-sim/rfnocsim.py
@@ -0,0 +1,757 @@
+#!/usr/bin/env python
+#
+# Copyright 2016 Ettus Research
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import collections
+import copy
+import re
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.ticker as mticker
+from graphviz import Digraph
+
+#------------------------------------------------------------
+# Simulator Core Components
+#------------------------------------------------------------
+class comptype():
+ """
+ Simulation component type enumeration
+ """
+ producer = 'Producer'
+ consumer = 'Consumer'
+ channel = 'Channel'
+ function = 'Function'
+ hardware = 'Hardware'
+ other = 'Other'
+
+class SimulatorCore:
+ """
+ Core simulation engine:
+ This class owns all the simulation components and
+ manages time and other housekeeping operations.
+ """
+
+ def __init__(self, tick_rate):
+ self.__ticks = 0
+ self.__tick_rate = tick_rate
+ self.__tick_aware_comps = list()
+ self.__all_comps = dict()
+ self.__edge_render_db = list()
+
+ def register(self, comp, tick_aware):
+ if comp.name not in self.__all_comps:
+ self.__all_comps[comp.name] = comp
+ else:
+ raise RuntimeError('Duplicate component ' + comp.name)
+ if tick_aware:
+ self.__tick_aware_comps.append(comp)
+
+ def connect(self, src, srcport, dst, dstport, render_label=None, render_color=None):
+ src.connect(srcport, dst.inputs(dstport, bind=True))
+ if render_label:
+ self.__edge_render_db.append(
+ (src.name, dst.name, 1.0, render_label, render_color))
+
+ def connect_bidir(self, ep1, ep1port, ep2, ep2port, render_labels=None, render_colors=None):
+ if render_labels:
+ if not isinstance(render_labels, (list, tuple)):
+ render_labels = [render_labels, render_labels]
+ else:
+ render_labels = [None, None]
+ if render_colors:
+ if not isinstance(render_colors, (list, tuple)):
+ render_colors = [render_colors, render_colors]
+ else:
+ render_colors = [None, None]
+ self.connect(ep1, ep1port, ep2, ep2port, render_labels[0], render_colors[0])
+ self.connect(ep2, ep2port, ep1, ep1port, render_labels[1], render_colors[1])
+
+ def connect_multi(self, src, srcports, dst, dstports, render_label=None, render_color=None):
+ if len(srcports) != len(dstports):
+ raise RuntimeError(
+ 'Source and destination ports should be of the same length')
+ for i in range(len(srcports)):
+ src.connect(srcports[i], dst.inputs(dstports[i], bind=True))
+ if render_label:
+ self.__edge_render_db.append((src.name, dst.name, float(len(srcports)), render_label, render_color))
+
+ def connect_multi_bidir(self, ep1, ep1port, ep2, ep2port, render_labels=None, render_colors=None):
+ if render_labels:
+ if not isinstance(render_labels, (list, tuple)):
+ render_labels = [render_labels, render_labels]
+ else:
+ render_labels = [None, None]
+ if render_colors:
+ if not isinstance(render_colors, (list, tuple)):
+ render_colors = [render_colors, render_colors]
+ else:
+ render_colors = [None, None]
+ self.connect_multi(ep1, ep1port, ep2, ep2port, render_labels[0], render_colors[0])
+ self.connect_multi(ep2, ep2port, ep1, ep1port, render_labels[1], render_colors[1])
+
+ def list_components(self, comptype='', name_filt=''):
+ if not comptype:
+ return sorted([c for c in list(self.__all_comps.keys())
+ if (re.match(name_filt, self.__all_comps[c].name))])
+ else:
+ return sorted([c for c in list(self.__all_comps.keys())
+ if (self.__all_comps[c].type == comptype and
+ re.match(name_filt, self.__all_comps[c].name))])
+
+ def lookup(self, comp_name):
+ return self.__all_comps[comp_name]
+
+ def tick(self):
+ self.__ticks += 1
+ for c in self.__tick_aware_comps:
+ c.tick()
+
+ def run(self, time_s):
+ for i in range(int(time_s * self.__tick_rate)):
+ self.tick()
+
+ def get_ticks(self):
+ return self.__ticks
+
+ def get_tick_rate(self):
+ return self.__tick_rate
+
+ def network_to_dot(self):
+ dot = Digraph(comment='RFNoC Network Topology')
+ node_ids = dict()
+ next_node_id = 1
+ for edgeinfo in self.__edge_render_db:
+ for i in range(2):
+ node = edgeinfo[i]
+ if node not in node_ids:
+ node_id = next_node_id
+ node_ids[node] = node_id
+ dot.node(str(node_id), node)
+ next_node_id += 1
+ for edgeinfo in self.__edge_render_db:
+ dot.edge(
+ tail_name=str(node_ids[edgeinfo[0]]),
+ head_name=str(node_ids[edgeinfo[1]]),
+ label=edgeinfo[3],
+ weight=str(edgeinfo[2]), penwidth=str(edgeinfo[2]/2),
+ color=str(edgeinfo[4] if edgeinfo[4] else 'black'))
+ return dot
+
+class SimComp:
+ """
+ Base simulation component:
+ All components must inherit from SimComp.
+ """
+
+ def __init__(self, sim_core, name, ctype):
+ self.__sim_core = sim_core
+ self.name = name
+ self.type = ctype
+ self.__sim_core.register(self, (ctype == comptype.producer))
+
+ def get_ticks(self):
+ return self.__sim_core.get_ticks()
+
+ def get_tick_rate(self):
+ return self.__sim_core.get_tick_rate()
+
+ def SimCompError(self, msg):
+ raise RuntimeError(msg + ' [' + self.name + ']')
+
+#------------------------------------------------------------
+# Data stream components
+#------------------------------------------------------------
+class HwRsrcs():
+ """
+ Hardware Resources Container:
+ This object holds physical hardware resource information
+ that can be used to report utilization. Resource items are
+ generic and can be defined by the actual simulation.
+ """
+
+ def __init__(self):
+ self.__rsrcs = dict()
+
+ def get(self, what):
+ if what in self.__rsrcs:
+ return self.__rsrcs[what]
+ else:
+ return 0.0
+
+ def set(self, what, value):
+ self.__rsrcs[what] = float(value)
+
+ def add(self, what, value):
+ if what in self.__rsrcs:
+ self.__rsrcs[what] += float(value)
+ else:
+ self.__rsrcs[what] = float(value)
+
+ def merge(self, other_rsrcs):
+ for attr in other_rsrcs.get_attrs():
+ self.add(attr, other_rsrcs.get(attr))
+
+ def get_attrs(self):
+ return list(self.__rsrcs.keys())
+
+ def reset(self, what = None):
+ if what is not None:
+ if what in self.__rsrcs:
+ self.__rsrcs[what] = 0.0
+ else:
+ self.__rsrcs = dict()
+
+class DataStream:
+ """
+ Data Stream Object:
+ Holds information about a date stream that passes through various block.
+ The simulator simulates event on the actual stream so each stream Object
+ must have a unique payload (items) to disambiguate it from the rest.
+ """
+ HopInfo = collections.namedtuple('HopInfo', ['location', 'latency'])
+
+ class HopDb():
+ def __init__(self, hops):
+ self.__hops = hops
+
+ def get_src(self):
+ return self.__hops[0].location
+
+ def get_dst(self):
+ return self.__hops[-1].location
+
+ def get_hops(self):
+ hoparr = []
+ for h in self.__hops:
+ hoparr.append(h.location)
+ return hoparr
+
+ def get_latency(self, ticks, location = ''):
+ latency = ticks - self.__hops[0].latency #Hop0 always has the init timestamp
+ if (self.__hops[0].location != location):
+ for i in range(1,len(self.__hops)):
+ latency += self.__hops[i].latency
+ if (self.__hops[i].location == location):
+ break
+ return latency
+
+ def __init__(self, bpi, items, count, producer=None, parent=None):
+ self.bpi = bpi
+ self.items = []
+ self.items.extend(items)
+ self.count = count
+ self.__hops = list()
+ if producer and parent:
+ raise RuntimeError('Data stream cannot have both a producer and a parent stream')
+ elif producer:
+ self.__hops.append(self.HopInfo(location='Gen@'+producer.name, latency=producer.get_ticks()))
+ elif parent:
+ self.__hops.extend(parent.get_hops())
+ else:
+ raise RuntimeError('Data stream must have a producer or a parent stream')
+
+ def add_hop(self, location, latency):
+ self.__hops.append(self.HopInfo(location=location, latency=latency))
+
+ def get_hops(self):
+ return self.__hops
+
+ def get_bytes(self):
+ return self.bpi * len(self.items) * self.count
+
+ """
+ Type specific methods
+ """
+ @staticmethod
+ def submatrix_gen(matrix_id, coordinates):
+ coord_arr = []
+ for c in coordinates:
+ if isinstance(c, collections.Iterable):
+ coord_arr.append('(' + (','.join(str(x) for x in c)) + ')')
+ else:
+ coord_arr.append('(' + str(c) + ')')
+ return matrix_id + '[' + ';'.join(coord_arr) + ']'
+
+ @staticmethod
+ def submatrix_parse(stream_id):
+ m = re.match('(.+)\[(.*)\]', stream_id)
+ matrix_id = m.group(1)
+ coords = []
+ for cstr in m.group(2).split(';'):
+ coords.append([int(x) for x in re.match('\((.+)\)', cstr).group(1).split(',')])
+ return (matrix_id, coords)
+
+#------------------------------------------------------------
+# Basic Network components
+#------------------------------------------------------------
+
+# Producer object.
+class Producer(SimComp):
+ """
+ Producer Block:
+ Generates data at a constant rate
+ """
+
+ def __init__(self, sim_core, name, bpi, items, max_samp_rate = float('inf'), latency = 0):
+ SimComp.__init__(self, sim_core, name, comptype.producer)
+ self.__bpi = bpi
+ self.__items = items
+ self.__bw = max_samp_rate * bpi
+ self.__latency = latency
+ self.__dests = list()
+ self.__data_count = 0
+ self.__byte_count = 0
+ self.__backpressure_ticks = 0
+ self.set_rate(self.get_tick_rate())
+
+ def inputs(self, i, bind=False):
+ raise self.SimCompError('This is a producer block. Cannot connect another block to it.')
+
+ def connect(self, i, dest):
+ self.__dests.append(dest)
+
+ def set_rate(self, samp_rate):
+ self.__data_count = samp_rate / self.get_tick_rate()
+
+ def tick(self):
+ if len(self.__dests) > 0:
+ ready = True
+ for dest in self.__dests:
+ ready = ready and dest.is_ready()
+ if ready:
+ data = DataStream(
+ bpi=self.__bpi, items=self.__items, count=self.__data_count, producer=self)
+ if self.__backpressure_ticks > 0:
+ data.add_hop('BP@'+self.name, self.__backpressure_ticks)
+ data.add_hop(self.name, self.__latency)
+ for dest in self.__dests:
+ dest.push(copy.deepcopy(data))
+ self.__byte_count += data.get_bytes()
+ self.__backpressure_ticks = 0
+ else:
+ self.__backpressure_ticks += 1
+
+ def get_bytes(self):
+ return self.__byte_count
+
+ def get_util_attrs(self):
+ return ['bandwidth']
+
+ def get_utilization(self, what):
+ if what in self.get_util_attrs():
+ return ((self.__byte_count / (self.get_ticks() / self.get_tick_rate())) /
+ self.__bw)
+ else:
+ return 0.0
+
+# Consumer object.
+class Consumer(SimComp):
+ """
+ Consumes Block:
+ Consumes data at a constant rate
+ """
+
+ def __init__(self, sim_core, name, bw = float("inf"), latency = 0):
+ SimComp.__init__(self, sim_core, name, comptype.consumer)
+ self.__byte_count = 0
+ self.__item_db = dict()
+ self.__bw = bw
+ self.__latency = latency
+ self.__bound = False
+
+ def inputs(self, i, bind=False):
+ if bind and self.__bound:
+ raise self.SimCompError('Input ' + str(i) + ' is already driven (bound).')
+ self.__bound = bind
+ return self
+
+ def connect(self, i, dest):
+ raise self.SimCompError('This is a consumer block. Cannot connect to another block.')
+
+ def is_ready(self):
+ return True #TODO: Readiness can depend on bw and byte_count
+
+ def push(self, data):
+ data.add_hop(self.name, self.__latency)
+ for item in data.items:
+ self.__item_db[item] = DataStream.HopDb(data.get_hops())
+ self.__byte_count += data.get_bytes()
+
+ def get_items(self):
+ return list(self.__item_db.keys())
+
+ def get_bytes(self):
+ return self.__byte_count
+
+ def get_hops(self, item):
+ return self.__item_db[item].get_hops()
+
+ def get_latency(self, item, hop=None):
+ if not hop:
+ hop = self.get_hops(item)[-1]
+ return self.__item_db[item].get_latency(self.get_ticks(), hop) / self.get_tick_rate()
+
+ def get_util_attrs(self):
+ return ['bandwidth']
+
+ def get_utilization(self, what):
+ if what in self.get_util_attrs():
+ return ((self.__byte_count / (self.get_ticks() / self.get_tick_rate())) /
+ self.__bw)
+ else:
+ return 0.0
+
+# Channel
+class Channel(SimComp):
+ """
+ A resource limited IO pipe:
+ From the data stream perspective, this is a passthrough
+ """
+
+ def __init__(self, sim_core, name, bw = float("inf"), latency = 0, lossy = True):
+ SimComp.__init__(self, sim_core, name, comptype.channel)
+ self.__bw = bw
+ self.__latency = latency
+ self.__lossy = lossy
+ self.__dests = list()
+ self.__byte_count = 0
+ self.__bound = False
+
+ def get_bytes(self):
+ return self.__byte_count
+
+ def inputs(self, i, bind=False):
+ if (i != 0):
+ raise self.SimCompError('An IO lane has only one input.')
+ if bind and self.__bound:
+ raise self.SimCompError('Input ' + str(i) + ' is already driven (bound).')
+ self.__bound = bind
+ return self
+
+ def connect(self, i, dest):
+ self.__dests.append(dest)
+
+ def is_connected(self):
+ return len(self.__dests) > 0
+
+ def is_bound(self):
+ return self.__bound
+
+ def is_ready(self):
+ # If nothing is hooked up to a lossy lane, it will drop data
+ if self.__lossy and not self.is_connected():
+ return True
+ ready = self.is_connected()
+ for dest in self.__dests:
+ ready = ready and dest.is_ready()
+ return ready
+
+ def push(self, data):
+ # If nothing is hooked up to a lossy lane, it will drop data
+ if self.__lossy and not self.is_connected():
+ return
+ data.add_hop(self.name, self.__latency)
+ for dest in self.__dests:
+ dest.push(copy.deepcopy(data))
+ self.__byte_count += data.get_bytes()
+
+ def get_util_attrs(self):
+ return ['bandwidth']
+
+ def get_utilization(self, what):
+ if what in self.get_util_attrs():
+ return ((self.__byte_count / (self.get_ticks() / self.get_tick_rate())) /
+ self.__bw)
+ else:
+ return 0.0
+
+# Function
+class Function(SimComp):
+ """
+ A Function Component:
+ A function block is something that does anything interesting with a data stream.
+ A function can have multiple input and output streams.
+ """
+
+ class Arg:
+ def __init__(self, num, base_func):
+ self.__num = num
+ self.__data = None
+ self.__base_func = base_func
+ self.__bound = False
+
+ def get_num(self):
+ return self.__num
+
+ def is_ready(self):
+ return self.__base_func.is_ready() and not self.__data
+
+ def push(self, data):
+ self.__data = data
+ self.__base_func.notify(self.__num)
+
+ def pop(self):
+ if self.__data:
+ data = self.__data
+ self.__data = None
+ return data
+ else:
+ raise RuntimeError('Nothing to pop.')
+
+ def bind(self, bind):
+ retval = self.__bound
+ self.__bound = bind
+ return retval
+
+ Latencies = collections.namedtuple('Latencies', ['func','inarg','outarg'])
+
+ def __init__(self, sim_core, name, num_in_args, num_out_args, ticks_per_exec = 1):
+ SimComp.__init__(self, sim_core, name, comptype.function)
+ self.__ticks_per_exec = ticks_per_exec
+ self.__last_exec_ticks = 0
+ self.__in_args = list()
+ for i in range(num_in_args):
+ self.__in_args.append(Function.Arg(i, self))
+ self.__dests = list()
+ for i in range(num_out_args):
+ self.__dests.append(None)
+ self.__in_args_pushed = dict()
+ # Resources required by this function to do its job in one tick
+ self.__rsrcs = HwRsrcs()
+ self.__latencies = self.Latencies(func=0, inarg=[0]*num_in_args, outarg=[0]*num_out_args)
+
+ def get_rsrcs(self):
+ return self.__rsrcs
+
+ def update_rsrcs(self, rsrcs):
+ self.__rsrcs = rsrcs
+
+ def update_latency(self, func, inarg=None, outarg=None):
+ self.__latencies = self.Latencies(
+ func=func,
+ inarg=inarg if inarg else [0]*len(self.__in_args),
+ outarg=outarg if outarg else [0]*len(self.__dests))
+
+ def inputs(self, i, bind=False):
+ if bind and self.__in_args[i].bind(True):
+ raise self.SimCompError('Input argument ' + str(i) + ' is already driven (bound).')
+ return self.__in_args[i]
+
+ def connect(self, i, dest):
+ self.__dests[i] = dest
+
+ def is_ready(self):
+ ready = len(self.__dests) > 0
+ for dest in self.__dests:
+ ready = ready and dest.is_ready()
+ exec_ready = (self.get_ticks() - self.__last_exec_ticks) >= self.__ticks_per_exec
+ return ready and exec_ready
+
+ def create_outdata_stream(self, bpi, items, count):
+ return DataStream(
+ bpi=bpi, items=items, count=count, parent=self.__max_latency_input)
+
+ def notify(self, arg_i):
+ self.__in_args_pushed[arg_i] = True
+ # Wait for all input args to come in
+ if (sorted(self.__in_args_pushed.keys()) == list(range(len(self.__in_args)))):
+ # Pop data out of each input arg
+ max_in_latency = 0
+ self.__max_latency_input = None
+ arg_data_in = list()
+ for arg in self.__in_args:
+ d = arg.pop()
+ arg_data_in.append(d)
+ lat = DataStream.HopDb(d.get_hops()).get_latency(self.get_ticks())
+ if lat > max_in_latency:
+ max_in_latency = lat
+ self.__max_latency_input = d
+ # Call the function
+ arg_data_out = self.do_func(arg_data_in)
+ if not isinstance(arg_data_out, collections.Iterable):
+ arg_data_out = [arg_data_out]
+ # Update output args
+ for i in range(len(arg_data_out)):
+ arg_data_out[i].add_hop(self.name,
+ max(self.__latencies.inarg) + self.__latencies.func + self.__latencies.outarg[i])
+ self.__dests[i].push(arg_data_out[i])
+ # Cleanup
+ self.__last_exec_ticks = self.get_ticks()
+ self.__in_args_pushed = dict()
+
+ def get_util_attrs(self):
+ return []
+
+ def get_utilization(self, what):
+ return 0.0
+
+#------------------------------------------------------------
+# Plotting Functions
+#------------------------------------------------------------
+class Visualizer():
+ def __init__(self, sim_core):
+ self.__sim_core = sim_core
+ self.__figure = None
+ self.__fig_dims = None
+
+ def show_network(self, engine='fdp'):
+ dot = self.__sim_core.network_to_dot()
+ dot.format = 'png'
+ dot.engine = engine
+ dot.render('/tmp/rfnoc_sim.dot', view=True, cleanup=True)
+
+ def dump_consumed_streams(self, consumer_filt='.*'):
+ comps = self.__sim_core.list_components(comptype.consumer, consumer_filt)
+ print('=================================================================')
+ print('Streams Received by Consumers matching (%s) at Tick = %04d'%(consumer_filt,self.__sim_core.get_ticks()))
+ print('=================================================================')
+ for c in sorted(comps):
+ comp = self.__sim_core.lookup(c)
+ for s in sorted(comp.get_items()):
+ print(' - %s: (%s) Latency = %gs'%(s,c,comp.get_latency(s)))
+ print('=================================================================')
+
+ def dump_debug_audit_log(self, ctype, name_filt='.*'):
+ if ctype != comptype.channel:
+ raise NotImplementedError('Component type not yet supported: ' + ctype)
+
+ comps = self.__sim_core.list_components(ctype, name_filt)
+ print('=================================================================')
+ print('Debug Audit for all %s Components matching (%s)'%(ctype,name_filt))
+ print('=================================================================')
+ for c in sorted(comps):
+ comp = self.__sim_core.lookup(c)
+ status = 'Unknown'
+ if comp.is_bound() and comp.is_connected():
+ status = 'Good'
+ elif comp.is_bound() and not comp.is_connected():
+ status = 'WARNING (Driven but Unused)'
+ elif not comp.is_bound() and comp.is_connected():
+ status = 'WARNING (Used but Undriven)'
+ else:
+ status = 'Unused'
+ print(' - %s: Status = %s'%(c,status))
+ print('=================================================================')
+
+ def new_figure(self, grid_dims=[1,1], fignum=1, figsize=(16, 9), dpi=72):
+ self.__figure = plt.figure(num=fignum, figsize=figsize, dpi=dpi)
+ self.__fig_dims = grid_dims
+
+ def show_figure(self):
+ plt.show()
+ self.__figure = None
+
+ def plot_utilization(self, ctype, name_filt='.*', grid_pos=1):
+ colors = ['b','r','g','y']
+ comps = self.__sim_core.list_components(ctype, name_filt)
+ attrs = set()
+ for c in comps:
+ attrs |= set(self.__sim_core.lookup(c).get_util_attrs())
+ attrs = sorted(list(attrs))
+
+ if not self.__figure:
+ self.new_figure()
+ show = True
+ else:
+ show = False
+ self.__figure.subplots_adjust(bottom=0.25)
+ ax = self.__figure.add_subplot(*(self.__fig_dims + [grid_pos]))
+ title = 'Resource utilization for all %s\ncomponents matching \"%s\"' % \
+ (ctype, name_filt)
+ ax.set_title(title)
+ ax.set_ylabel('Resource Utilization (%)')
+ if comps:
+ ind = np.arange(len(comps))
+ width = 0.95/len(attrs)
+ rects = []
+ ymax = 100
+ for i in range(len(attrs)):
+ utilz = [self.__sim_core.lookup(c).get_utilization(attrs[i]) * 100 for c in comps]
+ rects.append(ax.bar(ind + width*i, utilz, width, color=colors[i%len(colors)]))
+ ymax = max(ymax, int(math.ceil(max(utilz) / 100.0)) * 100)
+ ax.set_ylim([0,ymax])
+ ax.set_yticks(list(range(0,ymax,10)))
+ ax.set_xticks(ind + 0.5)
+ ax.set_xticklabels(comps, rotation=90)
+ ax.legend(rects, attrs)
+ ax.grid(b=True, which='both', color='0.65',linestyle='--')
+ ax.plot([0, len(comps)], [100, 100], "k--", linewidth=3.0)
+ if show:
+ self.show_figure()
+
+ def plot_consumption_latency(self, stream_filt='.*', consumer_filt='.*', grid_pos=1):
+ streams = list()
+ for c in sorted(self.__sim_core.list_components(comptype.consumer, consumer_filt)):
+ for s in sorted(self.__sim_core.lookup(c).get_items()):
+ if (re.match(stream_filt, s)):
+ streams.append((c, s, c + '/' + s))
+
+ if not self.__figure:
+ self.new_figure()
+ show = True
+ else:
+ show = False
+ self.__figure.subplots_adjust(bottom=0.25)
+ ax = self.__figure.add_subplot(*(self.__fig_dims + [grid_pos]))
+ title = 'Latency of Maximal Path Terminating in\nStream(s) matching \"%s\"\n(Consumer Filter = \"%s\")' % \
+ (stream_filt, consumer_filt)
+ ax.set_title(title)
+ ax.set_ylabel('Maximal Source-to-Sink Latency (s)')
+ if streams:
+ ind = np.arange(len(streams))
+ latency = [self.__sim_core.lookup(c_s_d1[0]).get_latency(c_s_d1[1]) for c_s_d1 in streams]
+ rects = [ax.bar(ind, latency, 1.0, color='b')]
+ ax.set_xticks(ind + 0.5)
+ ax.set_xticklabels([c_s_d[2] for c_s_d in streams], rotation=90)
+ attrs = ['latency']
+ ax.legend(rects, attrs)
+ ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.2e'))
+ ax.grid(b=True, which='both', color='0.65',linestyle='--')
+ if show:
+ self.show_figure()
+
+ def plot_path_latency(self, stream_id, consumer_filt = '.*', grid_pos=1):
+ path = []
+ latencies = []
+ for c in self.__sim_core.list_components(comptype.consumer, consumer_filt):
+ for s in self.__sim_core.lookup(c).get_items():
+ if (stream_id == s):
+ for h in self.__sim_core.lookup(c).get_hops(s):
+ path.append(h)
+ latencies.append(self.__sim_core.lookup(c).get_latency(s, h))
+ break
+ if not self.__figure:
+ self.new_figure()
+ show = True
+ else:
+ show = False
+ self.__figure.subplots_adjust(bottom=0.25)
+ ax = self.__figure.add_subplot(*(self.__fig_dims + [grid_pos]))
+ title = 'Accumulated Latency per Hop for Stream \"%s\"\n(Consumer Filter = \"%s\")' % \
+ (stream_id, consumer_filt)
+ ax.set_title(title)
+ ax.set_ylabel('Maximal Source-to-Sink Latency (s)')
+ if path:
+ ind = np.arange(len(path))
+ rects = [ax.plot(ind, latencies, '--rs')]
+ ax.set_xticks(ind)
+ ax.set_xticklabels(path, rotation=90)
+ ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.2e'))
+ ax.grid(b=True, which='both', color='0.65',linestyle='--')
+ if show:
+ self.show_figure()
diff --git a/fpga/usrp3/tools/utils/rfnoc-system-sim/sim_colosseum.py b/fpga/usrp3/tools/utils/rfnoc-system-sim/sim_colosseum.py
new file mode 100755
index 000000000..81ef6cbf9
--- /dev/null
+++ b/fpga/usrp3/tools/utils/rfnoc-system-sim/sim_colosseum.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+#
+# Copyright 2016 Ettus Research
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import rfnocsim
+import ni_hw_models as hw
+import colosseum_models
+import argparse
+import re
+
+def main():
+ # Arguments
+ parser = argparse.ArgumentParser(description='Simulate the Colosseum network')
+ parser.add_argument('--topology', type=str, default='flb', choices=['torus','flb'], help='Topology')
+ parser.add_argument('--domain', type=str, default='time', choices=['time','frequency'], help='Domain')
+ parser.add_argument('--fir_taps', type=int, default=4, help='FIR Filter Taps (Time domain only)')
+ parser.add_argument('--fir_dly_line', type=int, default=512, help='FIR Delay Line (Time domain only)')
+ parser.add_argument('--fft_size', type=int, default=512, help='FFT Size (Frequency domain only)')
+ parser.add_argument('--fft_overlap', type=int, default=256, help='FFT Overlap (Frequency domain only)')
+ parser.add_argument('--samp_rate', type=float, default=100e6, help='Radio Channel Sample Rate')
+ parser.add_argument('--coherence_rate', type=float, default=1000, help='Channel coefficient update rate')
+ args = parser.parse_args()
+
+ sim_core = rfnocsim.SimulatorCore(tick_rate=100e6)
+ NUM_USRPS = 128
+ NUM_HOSTS = 4
+ NUM_BLADES = 16
+ NUM_CHANS = NUM_USRPS * 2
+
+ # Build an application settings structure
+ app_settings = dict()
+ app_settings['domain'] = args.domain
+ app_settings['samp_rate'] = args.samp_rate
+ app_settings['coherence_rate'] = args.coherence_rate
+ if args.domain == 'frequency':
+ app_settings['fft_size'] = args.fft_size
+ app_settings['fft_overlap'] = args.fft_overlap
+ else:
+ app_settings['fir_taps'] = args.fir_taps
+ app_settings['fir_dly_line'] = args.fir_dly_line
+
+ print('[INFO] Instantiating hardware resources...')
+ # Create USRPs
+ usrps = []
+ for i in range(NUM_USRPS):
+ usrps.append(hw.UsrpX310(sim_core, index=i, app_settings=app_settings))
+ # Create BEE7s
+ bee7blades = []
+ for i in range(NUM_BLADES):
+ bee7blades.append(hw.Bee7Blade(sim_core, index=i))
+ # Create Management Hosts
+ hosts = []
+ for i in range(NUM_HOSTS):
+ hosts.append(hw.ManagementHostandSwitch(sim_core, index=i,
+ num_coeffs=pow(NUM_CHANS,2)/NUM_HOSTS, switch_ports=16, app_settings=app_settings))
+
+ # Build topology
+ print('[INFO] Building topology...')
+ if args.topology == 'torus':
+ colosseum_models.Topology_2D_4x4_Torus.connect(sim_core, usrps, bee7blades, hosts, app_settings)
+ elif args.topology == 'flb':
+ colosseum_models.Topology_3D_4x4_FLB.connect(sim_core, usrps, bee7blades, hosts, app_settings)
+ else:
+ raise RuntimeError('Invalid topology: ' + args.topology)
+
+ print('[INFO] Running simulation...')
+ sim_core.run(16e-9)
+
+ # Sanity checks
+ print('[INFO] Validating correctness...')
+ for u in sim_core.list_components(rfnocsim.comptype.hardware, 'USRP.*'):
+ sim_core.lookup(u).validate(0)
+ print('[INFO] Validating feasibility...')
+ for u in sim_core.list_components('', '.*'):
+ c = sim_core.lookup(u)
+ for a in c.get_util_attrs():
+ if c.get_utilization(a) > 1.0:
+ print('[WARN] %s: %s overutilized by %.1f%%' % (u,a,(c.get_utilization(a)-1)*100))
+ print('[INFO] Validating BEE7 FPGA image IO consistency...')
+ master_fpga = 'BEE7_000/FPGA_NE'
+ master_stats = dict()
+ for u in sim_core.list_components('', master_fpga + '/.*SER_.*'):
+ c = sim_core.lookup(u)
+ m = re.match('(.+)/(SER_.*)', u)
+ master_stats[m.group(2)] = c.get_utilization('bandwidth')
+ for ln in master_stats:
+ for u in sim_core.list_components('', '.*/' + ln):
+ c = sim_core.lookup(u)
+ m = re.match('(.+)/(SER_.*)', u)
+ if (c.get_utilization('bandwidth') != master_stats[ln]):
+ print('[WARN] Data flowing over ' + ln + ' is probably different between ' + master_fpga + ' and ' + m.group(1))
+
+ # Visualize various metrics
+ vis = rfnocsim.Visualizer(sim_core)
+ vis.show_network()
+ vis.new_figure([1,2])
+ vis.plot_utilization(rfnocsim.comptype.hardware, 'BEE7.*', 1)
+ vis.plot_utilization(rfnocsim.comptype.producer, 'USRP.*', 2)
+ vis.show_figure()
+ vis.new_figure([1,2])
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_000.*FPGA_NW.*EXT.*', 1)
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_006.*FPGA_SE.*EXT.*', 2)
+ vis.show_figure()
+ vis.new_figure([1,3])
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_010.*FPGA_NW.*SER_EW_.*', 1)
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_010.*FPGA_NW.*SER_NS_.*', 2)
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_010.*FPGA_NW.*SER_XX_.*', 3)
+ vis.show_figure()
+ vis.new_figure([1,4])
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_000.*FPGA_NW.*EXT.*', 1)
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_001.*FPGA_NW.*EXT.*', 2)
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_002.*FPGA_NW.*EXT.*', 3)
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_003.*FPGA_NW.*EXT.*', 4)
+ vis.show_figure()
+ vis.new_figure([1,4])
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_010.*FPGA_NW.*EXT.*', 1)
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_010.*FPGA_NE.*EXT.*', 2)
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_010.*FPGA_SW.*EXT.*', 3)
+ vis.plot_utilization(rfnocsim.comptype.channel, 'BEE7_010.*FPGA_SE.*EXT.*', 4)
+ vis.show_figure()
+ vis.new_figure([1,2])
+ vis.plot_consumption_latency('.*','.*USRP_.*', 1)
+ vis.plot_path_latency('tx[(0)]', '.*', 2)
+ vis.show_figure()
+ vis.plot_utilization(rfnocsim.comptype.producer, '.*MGMT_HOST.*')
+
+if __name__ == '__main__':
+ main()