1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
|
//
// Copyright 2012 Ettus Research LLC
// Copyright 2018 Ettus Research, a National Instruments Company
//
// SPDX-License-Identifier: LGPL-3.0-or-later
//
// Description:
// Holds packets in a FIFO until they are complete. This allows buffering
// slowly-built packets so they don't clog up downstream logic. If o_tready
// is held high, this module guarantees that o_tvalid will not be deasserted
// until a full packet is transferred. This module can also optionally drop
// a packet if the i_terror bit is asserted along with i_tlast. This allows
// discarding packet, say, if a CRC check fails.
// NOTE:
// - The maximum size of a packet that can pass through this module is
// 2^SIZE lines. If a larger packet is sent, this module will lock up.
// - Assuming that upstream is valid and downstream is ready, the maximum
// in to out latency per packet is (2^SIZE + 2) clock cycles.
// 2^SIZE because this module gates a packet, 1 cycle for the RAM read and
// 1 more cycle for the output register. This is not guaranteed behavior though.
// - The USE_AS_BUFF parameter can be used to treat this packet gate as
// a multi-packet buffer. When USE_AS_BUFF=0, the max number of packets
// (regardless of size) that the module can store is 2. When USE_AS_BUFF=1,
// the entire storage of this module can be used to buffer packets but at
// the cost of some additional RAM. Beware the sequence of (big packet,
// small packet, small packet), as some outside buffering may be needed
// to handle this case if USE_AS_BUFF=0.
module axi_packet_gate #(
parameter WIDTH = 64, // Width of datapath
parameter SIZE = 10, // log2 of the buffer size (must be >= MTU of packet)
parameter USE_AS_BUFF = 0, // Allow the packet gate to be used as a buffer (uses more RAM)
parameter MIN_PKT_SIZE= 0 // log2 of minimum valid packet size (rounded down, used to reduce addr fifo size)
) (
input wire clk,
input wire reset,
input wire clear,
input wire [WIDTH-1:0] i_tdata,
input wire i_tlast,
input wire i_terror,
input wire i_tvalid,
output wire i_tready,
output reg [WIDTH-1:0] o_tdata = {WIDTH{1'b0}},
output reg o_tlast = 1'b0,
output reg o_tvalid = 1'b0,
input wire o_tready
);
localparam [SIZE-1:0] ADDR_ZERO = {SIZE{1'b0}};
localparam [SIZE-1:0] ADDR_ONE = {{(SIZE-1){1'b0}}, 1'b1};
// -------------------------------------------
// RAM block that will hold pkts
// -------------------------------------------
wire wr_en, rd_en;
wire [WIDTH:0] wr_data, rd_data;
reg [SIZE-1:0] wr_addr = ADDR_ZERO, rd_addr = ADDR_ZERO;
// Threshold to explicitly instantiate LUTRAM
localparam LUTRAM_THRESH = 5;
// We need to instantiate a simple dual-port RAM here so
// we use the ram_2port module with one read port and one
// write port and "NO-CHANGE" mode.
ram_2port #(
.DWIDTH (WIDTH+1), .AWIDTH(SIZE),
.RW_MODE("NO-CHANGE"), .OUT_REG(0),
.RAM_TYPE(SIZE <= LUTRAM_THRESH ? "LUTRAM" : "AUTOMATIC")
) ram_i (
.clka (clk), .ena(1'b1), .wea(wr_en),
.addra(wr_addr), .dia(wr_data), .doa(),
.clkb (clk), .enb(rd_en), .web(1'b0),
.addrb(rd_addr), .dib({WIDTH+1{1'b0}}), .dob(rd_data)
);
// FIFO empty/full logic. The condition for both
// empty and full is when rd_addr == wr_addr. However,
// it matters if we approach that case from the low side
// or the high side. So keep track of the almost empty/full
// state for determine if the next transaction will cause
// the FIFO to be truly empty or full.
reg ram_full = 1'b0, ram_empty = 1'b1;
wire almost_full = (wr_addr == rd_addr - ADDR_ONE);
wire almost_empty = (wr_addr == rd_addr + ADDR_ONE);
always @(posedge clk) begin
if (reset | clear) begin
ram_full <= 1'b0;
end else begin
if (almost_full) begin
if (wr_en & ~rd_en)
ram_full <= 1'b1;
end else begin
if (~wr_en & rd_en)
ram_full <= 1'b0;
end
end
end
always @(posedge clk) begin
if (reset | clear) begin
ram_empty <= 1'b1;
end else begin
if (almost_empty) begin
if (rd_en & ~wr_en)
ram_empty <= 1'b1;
end else begin
if (~rd_en & wr_en)
ram_empty <= 1'b0;
end
end
end
// -------------------------------------------
// Address FIFO
// -------------------------------------------
// The address FIFO will hold the write address
// for the last line in a non-errant packet
wire [SIZE-1:0] afifo_i_tdata, afifo_o_tdata, afifo_p_tdata;
wire afifo_i_tvalid, afifo_i_tready;
wire afifo_o_tvalid, afifo_o_tready;
wire afifo_p_tvalid, afifo_p_tready;
axi_fifo #(.WIDTH(SIZE), .SIZE(USE_AS_BUFF==1 ? SIZE-MIN_PKT_SIZE : 1)) addr_fifo_i (
.clk(clk), .reset(reset), .clear(clear),
.i_tdata(afifo_i_tdata), .i_tvalid(afifo_i_tvalid), .i_tready(afifo_i_tready),
.o_tdata(afifo_p_tdata), .o_tvalid(afifo_p_tvalid), .o_tready(afifo_p_tready),
.space(), .occupied()
);
axi_fifo #(.WIDTH(SIZE), .SIZE(1)) addr_fifo_pipe_i (
.clk(clk), .reset(reset), .clear(clear),
.i_tdata(afifo_p_tdata), .i_tvalid(afifo_p_tvalid), .i_tready(afifo_p_tready),
.o_tdata(afifo_o_tdata), .o_tvalid(afifo_o_tvalid), .o_tready(afifo_o_tready),
.space(), .occupied()
);
// -------------------------------------------
// Write state machine
// -------------------------------------------
reg [SIZE-1:0] wr_head_addr = ADDR_ZERO;
assign i_tready = ~ram_full & afifo_i_tready;
assign wr_en = i_tvalid & i_tready;
assign wr_data = {i_tlast, i_tdata};
always @(posedge clk) begin
if (reset | clear) begin
wr_addr <= ADDR_ZERO;
wr_head_addr <= ADDR_ZERO;
end else begin
if (wr_en) begin
if (i_tlast) begin
if (i_terror) begin
// Incoming packet had an error. Rewind the write
// pointer and pretend that a packet never came in.
wr_addr <= wr_head_addr;
end else begin
// Incoming packet had no error, advance wr_addr and
// wr_head_addr for the next packet.
wr_addr <= wr_addr + ADDR_ONE;
wr_head_addr <= wr_addr + ADDR_ONE;
end
end else begin
// Packet is still in progress, only update wr_addr
wr_addr <= wr_addr + ADDR_ONE;
end
end
end
end
// Push the write address to the address FIFO if
// - It is the last one in the packet
// - The packet has no errors
assign afifo_i_tdata = wr_addr;
assign afifo_i_tvalid = ~ram_full & i_tvalid & i_tlast & ~i_terror;
// -------------------------------------------
// Read state machine
// -------------------------------------------
reg rd_data_valid = 1'b0;
wire update_out_reg;
// Data can be read if there is a valid last address in the
// address FIFO (signifying the end of an input packet) and
// if there is data available in RAM
wire ready_to_read = (~ram_empty) & afifo_o_tvalid;
// Pop from address FIFO once we have see the end of the pkt
assign afifo_o_tready = rd_en & (afifo_o_tdata == rd_addr);
// Read from RAM if
// - A full packet has been written AND
// - Output data is not valid OR is currently being transferred
assign rd_en = ready_to_read & (update_out_reg | ~rd_data_valid);
always @(posedge clk) begin
if (reset | clear) begin
rd_data_valid <= 1'b0;
rd_addr <= ADDR_ZERO;
end else begin
if (update_out_reg | ~rd_data_valid) begin
// Output data is not valid OR is currently being transferred
if (ready_to_read) begin
rd_data_valid <= 1'b1;
rd_addr <= rd_addr + ADDR_ONE;
end else begin
rd_data_valid <= 1'b0; // Don't read
end
end
end
end
// Instantiate an output register to break critical paths starting
// at the RAM module. When ram_2port is inferred as BRAM, the tools
// should absorb this register into the BRAM block without using
// SLICE resources.
always @(posedge clk) begin
if (reset | clear) begin
o_tvalid <= 1'b0;
end else if (update_out_reg) begin
o_tvalid <= rd_data_valid;
{o_tlast, o_tdata} <= rd_data;
end
end
// Update the output reg only *after* the downstream
// block has consumed the current value
assign update_out_reg = o_tready | ~o_tvalid;
endmodule
|