From 36e0219beaf03961cd46a6c1647510a5eebd36a3 Mon Sep 17 00:00:00 2001 From: Matt Ettus Date: Mon, 14 Jun 2010 18:51:53 -0700 Subject: barely fails timing on gigE/10 and gigE/12, larger fail on udp/10, but all seem to work ok --- usrp2/control_lib/Makefile.srcs | 1 + usrp2/control_lib/ram_harvard.v | 69 ++++++ usrp2/control_lib/ram_loader.v | 460 ++++++++++++++++++++++------------------ 3 files changed, 318 insertions(+), 212 deletions(-) create mode 100644 usrp2/control_lib/ram_harvard.v (limited to 'usrp2/control_lib') diff --git a/usrp2/control_lib/Makefile.srcs b/usrp2/control_lib/Makefile.srcs index 5e2a96a53..bc8e4d5bc 100644 --- a/usrp2/control_lib/Makefile.srcs +++ b/usrp2/control_lib/Makefile.srcs @@ -20,6 +20,7 @@ mux8.v \ nsgpio.v \ ram_2port.v \ ram_harv_cache.v \ +ram_harvard.v \ ram_loader.v \ setting_reg.v \ settings_bus.v \ diff --git a/usrp2/control_lib/ram_harvard.v b/usrp2/control_lib/ram_harvard.v new file mode 100644 index 000000000..948f9b36f --- /dev/null +++ b/usrp2/control_lib/ram_harvard.v @@ -0,0 +1,69 @@ + + +// Dual ported, Harvard architecture, cached ram + +module ram_harvard + #(parameter AWIDTH=15, + parameter RAM_SIZE=16384, + parameter ICWIDTH=6, + parameter DCWIDTH=6) + + (input wb_clk_i, + input wb_rst_i, + // Firmware download port. + input [AWIDTH-1:0] ram_loader_adr_i, + input [31:0] ram_loader_dat_i, + input [3:0] ram_loader_sel_i, + input ram_loader_stb_i, + input ram_loader_we_i, + input ram_loader_done_i, + // Instruction fetch port. + input [AWIDTH-1:0] if_adr, + output [31:0] if_data, + // Data access port. + input [AWIDTH-1:0] dwb_adr_i, + input [31:0] dwb_dat_i, + output [31:0] dwb_dat_o, + input dwb_we_i, + output dwb_ack_o, + input dwb_stb_i, + input [3:0] dwb_sel_i, + + input flush_icache ); + + reg ack_d1; + reg stb_d1; + + dpram32 #(.AWIDTH(AWIDTH),.RAM_SIZE(RAM_SIZE)) + sys_ram + (.clk(wb_clk_i), + .adr1_i(ram_loader_done_i ? if_adr : ram_loader_adr_i), + .dat1_i(ram_loader_dat_i), + .dat1_o(if_data), + .we1_i(ram_loader_done_i ? 1'b0 : ram_loader_we_i), + .en1_i(ram_loader_done_i ? 1'b1 : ram_loader_stb_i), + //.sel1_i(ram_loader_done_i ? 4'hF : ram_loader_sel_i), + .sel1_i(ram_loader_sel_i), // Sel is only for writes anyway + .adr2_i(dwb_adr_i), + .dat2_i(dwb_dat_i), + .dat2_o(dwb_dat_o), + .we2_i(dwb_we_i), + .en2_i(dwb_stb_i), + .sel2_i(dwb_sel_i) + ); + + assign dwb_ack_o = dwb_stb_i & (dwb_we_i | (stb_d1 & ~ack_d1)); + + always @(posedge wb_clk_i) + if(wb_rst_i) + ack_d1 <= 1'b0; + else + ack_d1 <= dwb_ack_o; + + always @(posedge wb_clk_i) + if(wb_rst_i) + stb_d1 <= 0; + else + stb_d1 <= dwb_stb_i; + +endmodule // ram_harvard diff --git a/usrp2/control_lib/ram_loader.v b/usrp2/control_lib/ram_loader.v index cb67de739..c53ea7aa7 100644 --- a/usrp2/control_lib/ram_loader.v +++ b/usrp2/control_lib/ram_loader.v @@ -1,225 +1,261 @@ +module ram_loader + #(parameter AWIDTH=16, RAM_SIZE=16384) + ( + // Wishbone I/F and clock domain + input wb_clk, + input dsp_clk, + input ram_loader_rst, + output wire [31:0] wb_dat, + output wire [AWIDTH-1:0] wb_adr, + output wb_stb, + output reg [3:0] wb_sel, + output wb_we, + output reg ram_loader_done, + // CPLD signals and clock domain + input cpld_clk, + input cpld_din, + output reg cpld_start, + output reg cpld_mode, + output reg cpld_done, + input cpld_detached + ); -// Adapted from VHDL code in spi_boot by Arnim Legauer -// Added a full wishbone master interface (32-bit) - -module ram_loader #(parameter AWIDTH=16, RAM_SIZE=16384) - (input clk_i, input rst_i, - // CPLD Interface - input cfg_clk_i, input cfg_data_i, - output start_o, output mode_o, output done_o, - input detached_i, - // Wishbone interface - output wire [31:0] wb_dat_o, - output reg [AWIDTH-1:0] wb_adr_o, - output wb_stb_o, - output wb_cyc_o, - output reg [3:0] wb_sel_o, - output reg wb_we_o, - input wb_ack_i, - output ram_loader_done_o); + localparam S0 = 0; + localparam S1 = 1; + localparam S2 = 2; + localparam S3 = 3; + localparam S4 = 4; + localparam S5 = 5; + localparam S6 = 6; + localparam RESET = 7; - // FSM to control start signal, clocked on main clock - localparam FSM1_WAIT_DETACH = 2'b00; - localparam FSM1_CHECK_NO_DONE = 2'b01; - localparam FSM1_WAIT_DONE = 2'b10; - - reg [1:0] start_fsm_q, start_fsm_s; - reg start_q, enable_q, start_s, enable_s; - reg done_q, done_s; + localparam WB_IDLE = 0; + localparam WB_WRITE = 1; + + + reg [AWIDTH+2:0] count; // 3 LSB's count bits in, the MSB's generate the Wishbone address + reg [6:0] shift_reg; + reg [7:0] data_reg; + reg sampled_clk; + reg sampled_clk_meta; + reg sampled_din; + reg inc_count; + reg load_data_reg; + reg shift; + reg wb_state, wb_next_state; + reg [2:0] state, next_state; + + // + // CPLD clock doesn't free run and is approximately 12.5MHz. + // Use 50MHz Wishbone clock to sample it as a signal and avoid having + // an extra clock domain for no reason. + // + + always @(posedge dsp_clk or posedge ram_loader_rst) + if (ram_loader_rst) + begin + sampled_clk_meta <= 1'b0; + sampled_clk <= 1'b0; + sampled_din <= 1'b0; + count <= 'h7FFF8; // Initialize so that address will be 0 when first byte fully received. + data_reg <= 0; + shift_reg <= 0; + end + else + begin + sampled_clk_meta <= cpld_clk; + sampled_clk <= sampled_clk_meta; + sampled_din <= cpld_din; + if (inc_count) + count <= count + 1'b1; + if (load_data_reg) + data_reg <= {shift_reg,sampled_din}; + if (shift) + shift_reg <= {shift_reg[5:0],sampled_din}; + end // else: !if(ram_loader_rst) - always @(posedge clk_i or posedge rst_i) - if(rst_i) - begin - start_fsm_q <= FSM1_WAIT_DETACH; - start_q <= 1'b0; - enable_q <= 1'b0; - end + + always @(posedge dsp_clk or posedge ram_loader_rst) + if (ram_loader_rst) + state <= RESET; else - begin - start_fsm_q <= start_fsm_s; - enable_q <= enable_s; - start_q <= start_s; - end // else: !if(rst_i) - + state <= next_state; + + always @* - case(start_fsm_q) - FSM1_WAIT_DETACH: - if(detached_i == 1'b1) - begin - start_fsm_s <= FSM1_CHECK_NO_DONE; - enable_s <= 1'b1; - start_s <= 1'b1; - end - else - begin - start_fsm_s <= FSM1_WAIT_DETACH; - enable_s <= enable_q; - start_s <= start_q; - end // else: !if(detached_i == 1'b1) - FSM1_CHECK_NO_DONE: - if(~done_q) - begin - start_fsm_s <= FSM1_WAIT_DONE; - enable_s <= enable_q; - start_s <= start_q; - end - else - begin - start_fsm_s <= FSM1_CHECK_NO_DONE; - enable_s <= enable_q; - start_s <= start_q; - end // else: !if(~done_q) - FSM1_WAIT_DONE: - if(done_q) - begin - start_fsm_s <= FSM1_WAIT_DETACH; - enable_s <= 1'b0; - start_s <= 1'b0; - end - else - begin - start_fsm_s <= FSM1_WAIT_DONE; - enable_s <= enable_q; - start_s <= start_q; - end // else: !if(done_q) - default: - begin - start_fsm_s <= FSM1_WAIT_DETACH; - enable_s <= enable_q; - start_s <= start_q; - end // else: !if(done_q) - endcase // case(start_fsm_q) - - // FSM running on data clock - - localparam FSM2_IDLE = 3'b000; - localparam FSM2_WE_ON = 3'b001; - localparam FSM2_WE_OFF = 3'b010; - localparam FSM2_INC_ADDR1 = 3'b011; - localparam FSM2_INC_ADDR2 = 3'b100; - localparam FSM2_FINISHED = 3'b101; - - reg [AWIDTH-1:0] addr_q; - reg [7:0] shift_dat_q, ser_dat_q; - reg [2:0] bit_q, fsm_q, fsm_s; - reg bit_ovfl_q, ram_we_s, ram_we_q, mode_q, mode_s, inc_addr_s; - - always @(posedge cfg_clk_i or posedge rst_i) - if(rst_i) - begin - addr_q <= 0; - shift_dat_q <= 8'd0; - ser_dat_q <= 8'd0; - bit_q <= 3'd0; - bit_ovfl_q <= 1'b0; - fsm_q <= FSM2_IDLE; - ram_we_q <= 1'b0; - done_q <= 1'b0; - mode_q <= 1'b0; - end + begin + // Defaults + next_state = state; + cpld_start = 1'b0; + shift = 1'b0; + inc_count = 0; + load_data_reg = 1'b0; + ram_loader_done = 1'b0; + cpld_mode = 1'b0; + cpld_done = 1'b1; + + + + case (state) //synthesis parallel_case full_case + // After reset wait until CPLD indicates its detached. + RESET: begin + if (cpld_detached) + next_state = S0; + else + next_state = RESET; + end + + // Assert cpld_start to signal the CPLD its to start sending serial clock and data. + // Assume cpld_clk is low as we transition into search for first rising edge + S0: begin + cpld_start = 1'b1; + cpld_done = 1'b0; + if (~cpld_detached) + next_state = S2; + else + next_state = S0; + end + + // + S1: begin + cpld_start = 1'b1; + cpld_done = 1'b0; + if (sampled_clk) + begin + // Found rising edge on cpld_clk. + if (count[2:0] == 3'b111) + // Its the last bit of a byte, send it out to the Wishbone bus. + begin + load_data_reg = 1'b1; + inc_count = 1'b1; + end + else + // Shift databit into LSB of shift register and increment count + begin + shift = 1'b1; + inc_count = 1'b1; + end // else: !if(count[2:0] == 3'b111) + next_state = S2; + end // if (sampled_clk) + else + next_state = S1; + end // case: S1 + + // + S2: begin + cpld_start = 1'b1; + cpld_done = 1'b0; + if (~sampled_clk) + // Found negative edge of clock + if (count[AWIDTH+2:3] == RAM_SIZE-1) // NOTE need to change this constant + // All firmware now downloaded + next_state = S3; + else + next_state = S1; + else + next_state = S2; + end // case: S2 + + // Now that terminal count is reached and all firmware is downloaded signal CPLD that download is done + // and that mode is now SPI mode. + S3: begin + if (sampled_clk) + begin + cpld_mode = 1'b1; + cpld_done = 1'b1; + next_state = S4; + end + else + next_state = S3; + end + + // Search for negedge of cpld_clk whilst keeping done sequence asserted. + // Keep done assserted + S4: begin + cpld_mode = 1'b1; + cpld_done = 1'b1; + if (~sampled_clk) + next_state = S5; + else + next_state = S4; + end + + // Search for posedge of cpld_clk whilst keeping done sequence asserted. + S5: begin + cpld_mode = 1'b1; + cpld_done = 1'b1; + if (sampled_clk) + next_state = S6; + else + next_state = S5; + end + + // Stay in this state until reset/power down + S6: begin + ram_loader_done = 1'b1; + cpld_done = 1'b1; + cpld_mode = 1'b1; + next_state = S6; + end + + endcase // case(state) + end + + always @(posedge dsp_clk or posedge ram_loader_rst) + if (ram_loader_rst) + wb_state <= WB_IDLE; else - begin - if(inc_addr_s) - addr_q <= addr_q + 1; - if(enable_q) - begin - bit_q <= bit_q + 1; - bit_ovfl_q <= (bit_q == 3'd7); - shift_dat_q[0] <= cfg_data_i; - shift_dat_q[7:1] <= shift_dat_q[6:0]; - end - if(bit_ovfl_q) - ser_dat_q <= shift_dat_q; - - fsm_q <= fsm_s; - - ram_we_q <= ram_we_s; - - if(done_s) - done_q <= 1'b1; - mode_q <= mode_s; - end // else: !if(rst_i) + wb_state <= wb_next_state; + reg do_write; + wire empty, full; + always @* begin - inc_addr_s <= 1'b0; - ram_we_s <= 1'b0; - done_s <= 1'b0; - fsm_s <= FSM2_IDLE; - mode_s <= 1'b0; - - case(fsm_q) - FSM2_IDLE : - if(start_q) - if(bit_ovfl_q) - fsm_s <= FSM2_WE_ON; - FSM2_WE_ON: - begin - ram_we_s <= 1'b1; - fsm_s <= FSM2_WE_OFF; - end - FSM2_WE_OFF: - begin - ram_we_s <= 1'b1; - fsm_s <= FSM2_INC_ADDR1; - end - FSM2_INC_ADDR1: - fsm_s <= FSM2_INC_ADDR2; - FSM2_INC_ADDR2: - if(addr_q == (RAM_SIZE-1)) - //if(&addr_q) - begin - fsm_s <= FSM2_FINISHED; - done_s <= 1'b1; - mode_s <= 1'b1; - end - else - begin - inc_addr_s <= 1'b1; - fsm_s <= FSM2_IDLE; - end // else: !if(&addr_q) - FSM2_FINISHED: - begin - fsm_s <= FSM2_FINISHED; - mode_s <= 1'b1; - end - endcase // case(fsm_q) + wb_next_state = wb_state; + do_write = 1'b0; + + case (wb_state) //synthesis full_case parallel_case + // + WB_IDLE: begin + if (load_data_reg) + // Data reg will load ready to write wishbone @ next clock edge + wb_next_state = WB_WRITE; + else + wb_next_state = WB_IDLE; + end + + // Drive address and data onto wishbone. + WB_WRITE: begin + do_write = 1'b1; + if (~full) + wb_next_state = WB_IDLE; + else + wb_next_state = WB_WRITE; + end + + endcase // case(wb_state) end // always @ * - assign start_o = start_q; - assign mode_o = mode_q; - assign done_o = start_q ? done_q : 1'b1; - wire [AWIDTH-1:0] ram_addr = addr_q; - wire [7:0] ram_data = ser_dat_q; - assign ram_loader_done_o = (fsm_q == FSM2_FINISHED); - - // wishbone master, only writes - reg [7:0] dat_holder; - assign wb_dat_o = {4{dat_holder}}; - assign wb_stb_o = wb_we_o; - assign wb_cyc_o = wb_we_o; + wire [1:0] count_out; + wire [7:0] data_out; + + fifo_xlnx_16x40_2clk crossclk + (.rst(ram_loader_rst), + .wr_clk(dsp_clk), .din({count[4:3],count[AWIDTH+2:3],data_reg}), .wr_en(do_write), .full(full), + .rd_clk(wb_clk), .dout({count_out,wb_adr,data_out}), .rd_en(~empty), .empty(empty)); + + assign wb_dat = {4{data_out}}; + + always @* + case(count_out[1:0]) //synthesis parallel_case full_case + 2'b00 : wb_sel = 4'b1000; + 2'b01 : wb_sel = 4'b0100; + 2'b10 : wb_sel = 4'b0010; + 2'b11 : wb_sel = 4'b0001; + endcase + + assign wb_we = ~empty; + assign wb_stb = ~empty; - always @(posedge clk_i or posedge rst_i) - if(rst_i) - begin - dat_holder <= 8'd0; - wb_adr_o <= 0; - wb_sel_o <= 4'b0000; - wb_we_o <= 1'b0; - end - else if(ram_we_q) - begin - dat_holder <= ram_data; - wb_adr_o <= ram_addr; - wb_we_o <= 1'b1; - case(ram_addr[1:0]) // Big Endian - 2'b00 : wb_sel_o <= 4'b1000; - 2'b01 : wb_sel_o <= 4'b0100; - 2'b10 : wb_sel_o <= 4'b0010; - 2'b11 : wb_sel_o <= 4'b0001; - endcase // case(ram_addr[1:0]) - end // if (ram_we_q) - else if(wb_ack_i) - wb_we_o <= 1'b0; - endmodule // ram_loader -- cgit v1.2.3