aboutsummaryrefslogtreecommitdiffstats
path: root/fpga/usrp3/lib/axi/axis_shift_register.v
blob: 4b3c9f4deaa4af7dd8123c22bc59236651bf3252 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
//
// Copyright 2018 Ettus Research, A National Instruments Company
//
// SPDX-License-Identifier: LGPL-3.0-or-later
//
// Module: axis_shift_register
// Description:
//   This module implements a chain of flip-flops in connected
//   using AXI-Stream. It can be used in the following ways:
//   * As a AXI-Stream shift register. The tready path is 
//     combinatorial from the output to the input so backpressure
//     is immediate. The same behavior makes this module non-ideal
//     to actually break timing critical paths.
//   * An AXI-Stream wrapper module for a multi-cycle operation
//     with clock-enables. This can most commonly be used with DSP
//     operations like filters. Enable the sideband datapath to 
//     let the module handle handshaking while processing samples
//     outside it.
//
// Parameters:
//   - WIDTH: The bitwidth of a sample on the data bus.
//   - NSPC: The number of parallel samples per cycle to process. The
//       total width of the data bus will be WIDTH*NSPC.
//   - LATENCY: Number of stages in the shift register
//   - SIDEBAND_DATAPATH: If SIDEBAND_DATAPATH==1 then tdata is managed
//       outside this module and imported from s_sideband_data. 
//       If SIDEBAND_DATAPATH=0, then tdata is managed internally and 
//       the sideband signals are unused.
//       Useful when using this module to manage a DSP pipeline where the
//       data could be changing in each stage.
//   - GAPLESS: After the shift register has filled up, should gaps be
//       allowed? If set to 1, then if s_axis_tvalid goes low then the 
//       pipeline will stall and all bits in stage_stb will immediately go low 
//       to ensure all stages in the shift register have valid data.
//       NOTE: This GAPLESS=1 will not allow the final "LATENCY" samples
//       to exit the shift register.
//   - PIPELINE: Which ports to pipeline? {NONE, IN, OUT, INOUT}
//
// Signals:
//   - s_axis_* : Input sample stream (AXI-Stream)
//   - m_axis_* : Output sample stream (AXI-Stream)
//   - stage_stb : Transfer strobe for each stage 
//   - stage_eop : Transfer end-of-packet out. bit[i] = stage[i]
//   - m_sideband_data : Sideband data out for external consumer
//   - m_sideband_keep : Sideband keep signal out for external consumer
//   - s_sideband_data : Sideband data in from external producer

module axis_shift_register #(
  parameter WIDTH             = 32,
  parameter NSPC              = 1,
  parameter LATENCY           = 3,
  parameter SIDEBAND_DATAPATH = 0,
  parameter GAPLESS           = 0,
  parameter PIPELINE          = "NONE"
)(
  // Clock, reset and settings
  input  wire                     clk,              // Clock
  input  wire                     reset,            // Reset
  // Serial Data In (AXI-Stream)              
  input  wire [(WIDTH*NSPC)-1:0]  s_axis_tdata,     // Input stream tdata
  input  wire [NSPC-1:0]          s_axis_tkeep,     // Input stream tkeep (used as a sample qualifier)
  input  wire                     s_axis_tlast,     // Input stream tlast
  input  wire                     s_axis_tvalid,    // Input stream tvalid
  output wire                     s_axis_tready,    // Input stream tready
  // Serial Data Out (AXI-Stream)             
  output wire [(WIDTH*NSPC)-1:0]  m_axis_tdata,     // Output stream tdata
  output wire [NSPC-1:0]          m_axis_tkeep,     // Output stream tkeep (used as a sample qualifier)
  output wire                     m_axis_tlast,     // Output stream tlast
  output wire                     m_axis_tvalid,    // Output stream tvalid
  input  wire                     m_axis_tready,    // Output stream tready
  // Signals for the sideband data path                     
  output wire [LATENCY-1:0]       stage_stb,        // Transfer strobe out. bit[i] = stage[i]
  output wire [LATENCY-1:0]       stage_eop,        // Transfer end-of-packet out. bit[i] = stage[i]
  output wire [(WIDTH*NSPC)-1:0]  m_sideband_data,  // Sideband data out for external consumer
  output wire [NSPC-1:0]          m_sideband_keep,  // Sideband keep signal out for external consumer
  input  wire [(WIDTH*NSPC)-1:0]  s_sideband_data   // Sideband data in from external producer
);
  // Shift register width depends on whether the datapath is internal
  localparam SHREG_WIDTH     = SIDEBAND_DATAPATH[0] ? (NSPC + 1) : ((WIDTH*NSPC) + NSPC + 1);
  localparam SHREG_TLAST_LOC = SHREG_WIDTH-1;
  localparam SHREG_TKEEP_HI  = SHREG_WIDTH-2;
  localparam SHREG_TKEEP_LO  = SHREG_WIDTH-NSPC-1;

  //----------------------------------------------
  // Pipeline Logic
  // (fifo_flop2 is used because it breaks timing
  //  path going both ways: valid and ready)
  //----------------------------------------------
  wire [(WIDTH*NSPC)-1:0] i_tdata,  o_tdata;
  wire [NSPC-1:0]         i_tkeep,  o_tkeep;
  wire                    i_tlast,  o_tlast;
  wire                    i_tvalid, o_tvalid;
  wire                    i_tready, o_tready;

  generate
    // Input pipeline register if requested
    if (PIPELINE == "IN" || PIPELINE == "INOUT") begin
      axi_fifo_flop2 #(.WIDTH((WIDTH*NSPC) + NSPC + 1)) in_pipe_i (
        .clk(clk), .reset(reset), .clear(1'b0),
        .i_tdata({s_axis_tlast, s_axis_tkeep, s_axis_tdata}),
        .i_tvalid(s_axis_tvalid), .i_tready(s_axis_tready),
        .o_tdata({i_tlast, i_tkeep, i_tdata}), .o_tvalid(i_tvalid), .o_tready(i_tready),
        .space(), .occupied()
      );
    end else begin
      assign {i_tlast, i_tkeep, i_tdata} = {s_axis_tlast, s_axis_tkeep, s_axis_tdata};
      assign i_tvalid = s_axis_tvalid;
      assign s_axis_tready = i_tready;
    end

    // Output pipeline register if requested
    if (PIPELINE == "OUT" || PIPELINE == "INOUT") begin
      axi_fifo_flop2 #(.WIDTH((WIDTH*NSPC) + NSPC + 1)) out_pipe_i (
        .clk(clk), .reset(reset), .clear(1'b0),
        .i_tdata({o_tlast, o_tkeep, o_tdata}), .i_tvalid(o_tvalid), .i_tready(o_tready),
        .o_tdata({m_axis_tlast, m_axis_tkeep, m_axis_tdata}),
        .o_tvalid(m_axis_tvalid), .o_tready(m_axis_tready),
        .space(), .occupied()
      );
    end else begin
      assign {m_axis_tlast, m_axis_tkeep, m_axis_tdata} = {o_tlast, o_tkeep, o_tdata};
      assign m_axis_tvalid = o_tvalid;
      assign o_tready = m_axis_tready;
    end
  endgenerate

  assign m_sideband_data = i_tdata;
  assign m_sideband_keep = i_tkeep;

  //----------------------------------------------
  // Shift register stages
  //----------------------------------------------
  genvar i;
  generate
    if (GAPLESS == 0) begin
      // Individual stage wires
      wire [SHREG_WIDTH-1:0]  stg_tdata [0:LATENCY];
      wire                    stg_tvalid[0:LATENCY];
      wire                    stg_tready[0:LATENCY];
      // Shift register input
      assign stg_tdata[0] = SIDEBAND_DATAPATH[0] ? {i_tlast, i_tkeep} : {i_tlast, i_tkeep, i_tdata};
      assign stg_tvalid[0] = i_tvalid;
      assign i_tready = stg_tready[0];
      // Shift register output
      assign o_tlast = stg_tdata[LATENCY][SHREG_TLAST_LOC];
      assign o_tkeep = stg_tdata[LATENCY][SHREG_TKEEP_HI:SHREG_TKEEP_LO];
      assign o_tdata = SIDEBAND_DATAPATH[0] ? s_sideband_data : stg_tdata[LATENCY][(WIDTH*NSPC)-1:0];
      assign o_tvalid = stg_tvalid[LATENCY];
      assign stg_tready[LATENCY] = o_tready;
  
      for (i = 0; i < LATENCY; i=i+1) begin: stages
        axi_fifo_flop #(.WIDTH(SHREG_WIDTH)) reg_i (
          .clk(clk), .reset(reset), .clear(1'b0),
          .i_tdata(stg_tdata[i  ]), .i_tvalid(stg_tvalid[i  ]), .i_tready(stg_tready[i  ]),
          .o_tdata(stg_tdata[i+1]), .o_tvalid(stg_tvalid[i+1]), .o_tready(stg_tready[i+1]),
          .occupied(), .space()
        );
        assign stage_stb[i] = stg_tvalid[i] & stg_tready[i];
        assign stage_eop[i] = stage_stb[i] & stg_tdata[i][SHREG_TLAST_LOC];
      end
    end else begin // if (GAPLESS == 0)
      wire [(WIDTH*NSPC)-1:0] o_tdata_fifo;
      wire [NSPC-1:0]         o_tkeep_fifo;
      wire                    o_tlast_fifo, o_tvalid_fifo, o_tready_fifo;

      // Shift register to hold valids
      reg  [LATENCY-1:0]     stage_valid = {LATENCY{1'b0}};
      // Shift register to hold data/last
      reg  [SHREG_WIDTH-1:0] stage_shreg[0:LATENCY-1];
      wire [SHREG_WIDTH-1:0] shreg_input = SIDEBAND_DATAPATH[0] ? {i_tlast, i_tkeep} : {i_tlast, i_tkeep, i_tdata};
      wire                   shreg_ce = i_tready & i_tvalid;

      assign i_tready      = o_tready_fifo;
      assign o_tvalid_fifo = stage_valid[LATENCY-1] & shreg_ce;
      assign o_tlast_fifo  = stage_shreg[LATENCY-1][SHREG_TLAST_LOC];
      assign o_tkeep_fifo  = stage_shreg[LATENCY-1][SHREG_TKEEP_HI:SHREG_TKEEP_LO];
      assign o_tdata_fifo  = SIDEBAND_DATAPATH[0] ? s_sideband_data : stage_shreg[LATENCY-1][(WIDTH*NSPC)-1:0];

      for (i = 0; i < LATENCY; i=i+1) begin
        // Initialize shift register
        initial begin
          stage_shreg[i] <= {SHREG_WIDTH{1'b0}};
        end
        // Shift register logic
        always @(posedge clk) begin
          if (reset) begin
            stage_shreg[i] <= {SHREG_WIDTH{1'b0}};
            stage_valid[i] <= 1'b0;
          end else if (shreg_ce) begin
            stage_shreg[i] <= (i == 0) ? shreg_input : stage_shreg[i-1];
            stage_valid[i] <= (i == 0) ? 1'b1        : stage_valid[i-1];
          end
        end
        // Outputs
        assign stage_stb[i] = ((i == 0) ? 1'b1 : stage_valid[i-1]) & shreg_ce;
        assign stage_eop[i] = stage_stb[i] & ((i == 0) ? i_tlast : stage_shreg[i-1][SHREG_TLAST_LOC]);
      end

      // The "gapless" logic violates AXI-Stream by having an o_tready -> o_tvalid dependency, 
      // so we add a FIFO downstream to prevent deadlocks.
      axi_fifo #(.WIDTH((WIDTH*NSPC) + NSPC + 1), .SIZE($clog2(LATENCY))) out_fifo_i (
        .clk(clk), .reset(reset), .clear(1'b0),
        .i_tdata({o_tlast_fifo, o_tkeep_fifo, o_tdata_fifo}), .i_tvalid(o_tvalid_fifo), .i_tready(o_tready_fifo),
        .o_tdata({o_tlast, o_tkeep, o_tdata}), .o_tvalid(o_tvalid), .o_tready(o_tready),
        .space(), .occupied()
      );
    end
  endgenerate
endmodule // axis_shift_register