aboutsummaryrefslogtreecommitdiffstats
path: root/fpga/usrp3/lib/zpu/core
diff options
context:
space:
mode:
authorMartin Braun <martin.braun@ettus.com>2020-01-23 16:10:22 -0800
committerMartin Braun <martin.braun@ettus.com>2020-01-28 09:35:36 -0800
commitbafa9d95453387814ef25e6b6256ba8db2df612f (patch)
tree39ba24b5b67072d354775272e687796bb511848d /fpga/usrp3/lib/zpu/core
parent3075b981503002df3115d5f1d0b97d2619ba30f2 (diff)
downloaduhd-bafa9d95453387814ef25e6b6256ba8db2df612f.tar.gz
uhd-bafa9d95453387814ef25e6b6256ba8db2df612f.tar.bz2
uhd-bafa9d95453387814ef25e6b6256ba8db2df612f.zip
Merge FPGA repository back into UHD repository
The FPGA codebase was removed from the UHD repository in 2014 to reduce the size of the repository. However, over the last half-decade, the split between the repositories has proven more burdensome than it has been helpful. By merging the FPGA code back, it will be possible to create atomic commits that touch both FPGA and UHD codebases. Continuous integration testing is also simplified by merging the repositories, because it was previously difficult to automatically derive the correct UHD branch when testing a feature branch on the FPGA repository. This commit also updates the license files and paths therein. We are therefore merging the repositories again. Future development for FPGA code will happen in the same repository as the UHD host code and MPM code. == Original Codebase and Rebasing == The original FPGA repository will be hosted for the foreseeable future at its original local location: https://github.com/EttusResearch/fpga/ It can be used for bisecting, reference, and a more detailed history. The final commit from said repository to be merged here is 05003794e2da61cabf64dd278c45685a7abad7ec. This commit is tagged as v4.0.0.0-pre-uhd-merge. If you have changes in the FPGA repository that you want to rebase onto the UHD repository, simply run the following commands: - Create a directory to store patches (this should be an empty directory): mkdir ~/patches - Now make sure that your FPGA codebase is based on the same state as the code that was merged: cd src/fpga # Or wherever your FPGA code is stored git rebase v4.0.0.0-pre-uhd-merge Note: The rebase command may look slightly different depending on what exactly you're trying to rebase. - Create a patch set for your changes versus v4.0.0.0-pre-uhd-merge: git format-patch v4.0.0.0-pre-uhd-merge -o ~/patches Note: Make sure that only patches are stored in your output directory. It should otherwise be empty. Make sure that you picked the correct range of commits, and only commits you wanted to rebase were exported as patch files. - Go to the UHD repository and apply the patches: cd src/uhd # Or wherever your UHD repository is stored git am --directory fpga ~/patches/* rm -rf ~/patches # This is for cleanup == Contributors == The following people have contributed mainly to these files (this list is not complete): Co-authored-by: Alex Williams <alex.williams@ni.com> Co-authored-by: Andrej Rode <andrej.rode@ettus.com> Co-authored-by: Ashish Chaudhari <ashish@ettus.com> Co-authored-by: Ben Hilburn <ben.hilburn@ettus.com> Co-authored-by: Ciro Nishiguchi <ciro.nishiguchi@ni.com> Co-authored-by: Daniel Jepson <daniel.jepson@ni.com> Co-authored-by: Derek Kozel <derek.kozel@ettus.com> Co-authored-by: EJ Kreinar <ej@he360.com> Co-authored-by: Humberto Jimenez <humberto.jimenez@ni.com> Co-authored-by: Ian Buckley <ian.buckley@gmail.com> Co-authored-by: Jörg Hofrichter <joerg.hofrichter@ni.com> Co-authored-by: Jon Kiser <jon.kiser@ni.com> Co-authored-by: Josh Blum <josh@joshknows.com> Co-authored-by: Jonathon Pendlum <jonathan.pendlum@ettus.com> Co-authored-by: Martin Braun <martin.braun@ettus.com> Co-authored-by: Matt Ettus <matt@ettus.com> Co-authored-by: Michael West <michael.west@ettus.com> Co-authored-by: Moritz Fischer <moritz.fischer@ettus.com> Co-authored-by: Nick Foster <nick@ettus.com> Co-authored-by: Nicolas Cuervo <nicolas.cuervo@ettus.com> Co-authored-by: Paul Butler <paul.butler@ni.com> Co-authored-by: Paul David <paul.david@ettus.com> Co-authored-by: Ryan Marlow <ryan.marlow@ettus.com> Co-authored-by: Sugandha Gupta <sugandha.gupta@ettus.com> Co-authored-by: Sylvain Munaut <tnt@246tNt.com> Co-authored-by: Trung Tran <trung.tran@ettus.com> Co-authored-by: Vidush Vishwanath <vidush.vishwanath@ettus.com> Co-authored-by: Wade Fife <wade.fife@ettus.com>
Diffstat (limited to 'fpga/usrp3/lib/zpu/core')
-rw-r--r--fpga/usrp3/lib/zpu/core/zpu_config.vhd20
-rw-r--r--fpga/usrp3/lib/zpu/core/zpu_core.vhd948
-rw-r--r--fpga/usrp3/lib/zpu/core/zpupkg.vhd168
3 files changed, 1136 insertions, 0 deletions
diff --git a/fpga/usrp3/lib/zpu/core/zpu_config.vhd b/fpga/usrp3/lib/zpu/core/zpu_config.vhd
new file mode 100644
index 000000000..560547223
--- /dev/null
+++ b/fpga/usrp3/lib/zpu/core/zpu_config.vhd
@@ -0,0 +1,20 @@
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.std_logic_unsigned.all;
+
+package zpu_config is
+ -- generate trace output or not.
+ constant Generate_Trace : boolean := false;
+ constant wordPower : integer := 5;
+ -- during simulation, set this to '0' to get matching trace.txt
+ constant DontCareValue : std_logic := '0';
+ -- Clock frequency in MHz.
+ constant ZPU_Frequency : std_logic_vector(7 downto 0) := x"40";
+ -- This is the msb address bit. bytes=2^(maxAddrBitIncIO+1)
+ constant maxAddrBitIncIO : integer := 15;
+
+ -- start byte address of stack.
+ -- point to top of RAM - 2*words
+ constant spStart : std_logic_vector(maxAddrBitIncIO downto 0) := x"3ff8";
+
+end zpu_config;
diff --git a/fpga/usrp3/lib/zpu/core/zpu_core.vhd b/fpga/usrp3/lib/zpu/core/zpu_core.vhd
new file mode 100644
index 000000000..f32c76f6f
--- /dev/null
+++ b/fpga/usrp3/lib/zpu/core/zpu_core.vhd
@@ -0,0 +1,948 @@
+
+-- Company: ZPU4 generic memory interface CPU
+-- Engineer: yvind Harboe
+
+library IEEE;
+use IEEE.STD_LOGIC_1164.ALL;
+use IEEE.STD_LOGIC_UNSIGNED.ALL;
+use IEEE.STD_LOGIC_arith.ALL;
+
+library work;
+use work.zpu_config.all;
+use work.zpupkg.all;
+
+
+
+
+
+entity zpu_core is
+ Port ( clk : in std_logic;
+ areset : in std_logic;
+ enable : in std_logic;
+ mem_req : out std_logic;
+ mem_we : out std_logic;
+ mem_ack : in std_logic;
+ mem_read : in std_logic_vector(wordSize-1 downto 0);
+ mem_write : out std_logic_vector(wordSize-1 downto 0);
+ out_mem_addr : out std_logic_vector(maxAddrBitIncIO downto 0);
+ mem_writeMask: out std_logic_vector(wordBytes-1 downto 0);
+ interrupt : in std_logic;
+ break : out std_logic;
+ zpu_status : out std_logic_vector(63 downto 0));
+end zpu_core;
+
+architecture behave of zpu_core is
+
+type InsnType is
+(
+State_AddTop,
+State_Dup,
+State_DupStackB,
+State_Pop,
+State_Popdown,
+State_Add,
+State_Or,
+State_And,
+State_Store,
+State_AddSP,
+State_Shift,
+State_Nop,
+State_Im,
+State_LoadSP,
+State_StoreSP,
+State_Emulate,
+State_Load,
+State_PushPC,
+State_PushSP,
+State_PopPC,
+State_PopPCRel,
+State_Not,
+State_Flip,
+State_PopSP,
+State_Neqbranch,
+State_Eq,
+State_Loadb,
+State_Mult,
+State_Lessthan,
+State_Lessthanorequal,
+State_Ulessthanorequal,
+State_Ulessthan,
+State_Pushspadd,
+State_Call,
+State_Callpcrel,
+State_Sub,
+State_Break,
+State_Storeb,
+State_Interrupt,
+State_InsnFetch
+);
+
+type StateType is
+(
+State_Idle, -- using first state first on the list out of paranoia
+State_Load2,
+State_Popped,
+State_LoadSP2,
+State_LoadSP3,
+State_AddSP2,
+State_Fetch,
+State_Execute,
+State_Decode,
+State_Decode2,
+State_Resync,
+
+State_StoreSP2,
+State_Resync2,
+State_Resync3,
+State_Loadb2,
+State_Storeb2,
+State_Mult2,
+State_Mult3,
+State_Mult5,
+State_Mult6,
+State_Mult4,
+State_BinaryOpResult
+);
+
+
+signal pc : std_logic_vector(maxAddrBitIncIO downto 0);
+signal sp : std_logic_vector(maxAddrBitIncIO downto minAddrBit);
+signal incSp : std_logic_vector(maxAddrBitIncIO downto minAddrBit);
+signal incIncSp : std_logic_vector(maxAddrBitIncIO downto minAddrBit);
+signal decSp : std_logic_vector(maxAddrBitIncIO downto minAddrBit);
+signal stackA : std_logic_vector(wordSize-1 downto 0);
+signal binaryOpResult : std_logic_vector(wordSize-1 downto 0);
+signal multResult2 : std_logic_vector(wordSize-1 downto 0);
+signal multResult3 : std_logic_vector(wordSize-1 downto 0);
+signal multResult : std_logic_vector(wordSize-1 downto 0);
+signal multA : std_logic_vector(wordSize-1 downto 0);
+signal multB : std_logic_vector(wordSize-1 downto 0);
+signal stackB : std_logic_vector(wordSize-1 downto 0);
+signal idim_flag : std_logic;
+signal busy : std_logic;
+signal mem_readEnable : std_logic;
+signal mem_addr : std_logic_vector(maxAddrBitIncIO downto minAddrBit);
+signal mem_delayAddr : std_logic_vector(maxAddrBitIncIO downto minAddrBit);
+signal mem_delayReadEnable : std_logic;
+signal mem_busy : std_logic;
+signal decodeWord : std_logic_vector(wordSize-1 downto 0);
+
+
+signal state : StateType;
+signal insn : InsnType;
+type InsnArray is array(0 to wordBytes-1) of InsnType;
+signal decodedOpcode : InsnArray;
+
+type OpcodeArray is array(0 to wordBytes-1) of std_logic_vector(7 downto 0);
+
+signal opcode : OpcodeArray;
+
+
+
+
+signal begin_inst : std_logic;
+signal trace_opcode : std_logic_vector(7 downto 0);
+signal trace_pc : std_logic_vector(maxAddrBitIncIO downto 0);
+signal trace_sp : std_logic_vector(maxAddrBitIncIO downto minAddrBit);
+signal trace_topOfStack : std_logic_vector(wordSize-1 downto 0);
+signal trace_topOfStackB : std_logic_vector(wordSize-1 downto 0);
+
+signal out_mem_req : std_logic;
+
+signal inInterrupt : std_logic;
+
+-- state machine.
+
+begin
+ mem_writeMask <= (others => '1');
+
+ zpu_status(maxAddrBitIncIO downto 0) <= trace_pc;
+ zpu_status(31) <= '1';
+ zpu_status(39 downto 32) <= trace_opcode;
+ zpu_status(40) <= '1' when (state = State_Idle) else '0';
+ zpu_status(62) <= '1';
+
+ traceFileGenerate:
+ if Generate_Trace generate
+ trace_file: trace port map (
+ clk => clk,
+ begin_inst => begin_inst,
+ pc => trace_pc,
+ opcode => trace_opcode,
+ sp => trace_sp,
+ memA => trace_topOfStack,
+ memB => trace_topOfStackB,
+ busy => busy,
+ intsp => (others => 'U')
+ );
+ end generate;
+
+
+ -- the memory subsystem will tell us one cycle later whether or
+ -- not it is busy
+ out_mem_addr(maxAddrBitIncIO downto minAddrBit) <= mem_addr;
+ out_mem_addr(minAddrBit-1 downto 0) <= (others => '0');
+ mem_req <= out_mem_req;
+
+ incSp <= sp + 1;
+ incIncSp <= sp + 2;
+ decSp <= sp - 1;
+
+ mem_busy <= out_mem_req and not mem_ack; -- '1' when the memory is busy
+
+ opcodeControl:
+ process(clk, areset)
+ variable tOpcode : std_logic_vector(OpCode_Size-1 downto 0);
+ variable spOffset : std_logic_vector(4 downto 0);
+ variable tSpOffset : std_logic_vector(4 downto 0);
+ variable nextPC : std_logic_vector(maxAddrBitIncIO downto 0);
+ variable tNextState : InsnType;
+ variable tDecodedOpcode : InsnArray;
+ variable tMultResult : std_logic_vector(wordSize*2-1 downto 0);
+ begin
+ if areset = '1' then
+ state <= State_Idle;
+ break <= '0';
+ sp <= spStart(maxAddrBitIncIO downto minAddrBit);
+
+ pc <= (others => '0');
+ idim_flag <= '0';
+ begin_inst <= '0';
+ mem_we <= '0';
+ multA <= (others => '0');
+ multB <= (others => '0');
+ out_mem_req <= '0';
+ mem_addr <= (others => DontCareValue);
+ mem_write <= (others => DontCareValue);
+ inInterrupt <= '0';
+ elsif (clk'event and clk = '1') then
+ -- we must multiply unconditionally to get pipelined multiplication
+ tMultResult := multA * multB;
+ multResult3 <= multResult2;
+ multResult2 <= multResult;
+ multResult <= tMultResult(wordSize-1 downto 0);
+
+
+ spOffset(4):=not opcode(conv_integer(pc(byteBits-1 downto 0)))(4);
+ spOffset(3 downto 0):=opcode(conv_integer(pc(byteBits-1 downto 0)))(3 downto 0);
+ nextPC := pc + 1;
+
+ -- prepare trace snapshot
+ trace_opcode <= opcode(conv_integer(pc(byteBits-1 downto 0)));
+ trace_pc <= pc;
+ trace_sp <= sp;
+ trace_topOfStack <= stackA;
+ trace_topOfStackB <= stackB;
+ begin_inst <= '0';
+
+ -- we terminate the requeset as soon as we get acknowledge
+ if mem_ack = '1' then
+ out_mem_req <= '0';
+ mem_we <= '0';
+ end if;
+
+ if interrupt='0' then
+ inInterrupt <= '0'; -- no longer in an interrupt
+ end if;
+
+ case state is
+ when State_Idle =>
+ if enable='1' then
+ state <= State_Resync;
+ end if;
+ -- Initial state of ZPU, fetch top of stack + first instruction
+ when State_Resync =>
+ if mem_busy='0' then
+ mem_addr <= sp;
+ out_mem_req <= '1';
+ state <= State_Resync2;
+ end if;
+ when State_Resync2 =>
+ if mem_busy='0' then
+ stackA <= mem_read;
+ mem_addr <= incSp;
+ out_mem_req <= '1';
+ state <= State_Resync3;
+ end if;
+ when State_Resync3 =>
+ if mem_busy='0' then
+ stackB <= mem_read;
+ mem_addr <= pc(maxAddrBitIncIO downto minAddrBit);
+ out_mem_req <= '1';
+ state <= State_Decode;
+ end if;
+ when State_Decode =>
+ if mem_busy='0' then
+ decodeWord <= mem_read;
+ state <= State_Decode2;
+ end if;
+ when State_Decode2 =>
+ -- decode 4 instructions in parallel
+ for i in 0 to wordBytes-1 loop
+ tOpcode := decodeWord((wordBytes-1-i+1)*8-1 downto (wordBytes-1-i)*8);
+
+ tSpOffset(4):=not tOpcode(4);
+ tSpOffset(3 downto 0):=tOpcode(3 downto 0);
+
+ opcode(i) <= tOpcode;
+ if (tOpcode(7 downto 7)=OpCode_Im) then
+ tNextState:=State_Im;
+ elsif (tOpcode(7 downto 5)=OpCode_StoreSP) then
+ if tSpOffset = 0 then
+ tNextState := State_Pop;
+ elsif tSpOffset=1 then
+ tNextState := State_PopDown;
+ else
+ tNextState :=State_StoreSP;
+ end if;
+ elsif (tOpcode(7 downto 5)=OpCode_LoadSP) then
+ if tSpOffset = 0 then
+ tNextState :=State_Dup;
+ elsif tSpOffset = 1 then
+ tNextState :=State_DupStackB;
+ else
+ tNextState :=State_LoadSP;
+ end if;
+ elsif (tOpcode(7 downto 5)=OpCode_Emulate) then
+ tNextState :=State_Emulate;
+ if tOpcode(5 downto 0)=OpCode_Neqbranch then
+ tNextState :=State_Neqbranch;
+ elsif tOpcode(5 downto 0)=OpCode_Eq then
+ tNextState :=State_Eq;
+ elsif tOpcode(5 downto 0)=OpCode_Lessthan then
+ tNextState :=State_Lessthan;
+ elsif tOpcode(5 downto 0)=OpCode_Lessthanorequal then
+ --tNextState :=State_Lessthanorequal;
+ elsif tOpcode(5 downto 0)=OpCode_Ulessthan then
+ tNextState :=State_Ulessthan;
+ elsif tOpcode(5 downto 0)=OpCode_Ulessthanorequal then
+ --tNextState :=State_Ulessthanorequal;
+ elsif tOpcode(5 downto 0)=OpCode_Loadb then
+ tNextState :=State_Loadb;
+ elsif tOpcode(5 downto 0)=OpCode_Mult then
+ tNextState :=State_Mult;
+ elsif tOpcode(5 downto 0)=OpCode_Storeb then
+ tNextState :=State_Storeb;
+ elsif tOpcode(5 downto 0)=OpCode_Pushspadd then
+ tNextState :=State_Pushspadd;
+ elsif tOpcode(5 downto 0)=OpCode_Callpcrel then
+ tNextState :=State_Callpcrel;
+ elsif tOpcode(5 downto 0)=OpCode_Call then
+ --tNextState :=State_Call;
+ elsif tOpcode(5 downto 0)=OpCode_Sub then
+ tNextState :=State_Sub;
+ elsif tOpcode(5 downto 0)=OpCode_PopPCRel then
+ --tNextState :=State_PopPCRel;
+ end if;
+ elsif (tOpcode(7 downto 4)=OpCode_AddSP) then
+ if tSpOffset = 0 then
+ tNextState := State_Shift;
+ elsif tSpOffset = 1 then
+ tNextState := State_AddTop;
+ else
+ tNextState :=State_AddSP;
+ end if;
+ else
+ case tOpcode(3 downto 0) is
+ when OpCode_Nop =>
+ tNextState :=State_Nop;
+ when OpCode_PushSP =>
+ tNextState :=State_PushSP;
+ when OpCode_PopPC =>
+ tNextState :=State_PopPC;
+ when OpCode_Add =>
+ tNextState :=State_Add;
+ when OpCode_Or =>
+ tNextState :=State_Or;
+ when OpCode_And =>
+ tNextState :=State_And;
+ when OpCode_Load =>
+ tNextState :=State_Load;
+ when OpCode_Not =>
+ tNextState :=State_Not;
+ when OpCode_Flip =>
+ tNextState :=State_Flip;
+ when OpCode_Store =>
+ tNextState :=State_Store;
+ when OpCode_PopSP =>
+ tNextState :=State_PopSP;
+ when others =>
+ tNextState := State_Break;
+
+ end case;
+ end if;
+ tDecodedOpcode(i) := tNextState;
+
+ end loop;
+
+ insn <= tDecodedOpcode(conv_integer(pc(byteBits-1 downto 0)));
+
+ -- once we wrap, we need to fetch
+ tDecodedOpcode(0) := State_InsnFetch;
+
+ decodedOpcode <= tDecodedOpcode;
+ state <= State_Execute;
+
+
+
+ -- Each instruction must:
+ --
+ -- 1. set idim_flag
+ -- 2. increase pc if applicable
+ -- 3. set next state if appliable
+ -- 4. do it's operation
+
+ when State_Execute =>
+ insn <= decodedOpcode(conv_integer(nextPC(byteBits-1 downto 0)));
+
+ case insn is
+ when State_InsnFetch =>
+ state <= State_Fetch;
+ when State_Im =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '1';
+ pc <= pc + 1;
+
+ if idim_flag='1' then
+ stackA(wordSize-1 downto 7) <= stackA(wordSize-8 downto 0);
+ stackA(6 downto 0) <= opcode(conv_integer(pc(byteBits-1 downto 0)))(6 downto 0);
+ else
+ out_mem_req <= '1';
+ mem_we <= '1';
+ mem_addr <= incSp;
+ mem_write <= stackB;
+ stackB <= stackA;
+ sp <= decSp;
+ for i in wordSize-1 downto 7 loop
+ stackA(i) <= opcode(conv_integer(pc(byteBits-1 downto 0)))(6);
+ end loop;
+ stackA(6 downto 0) <= opcode(conv_integer(pc(byteBits-1 downto 0)))(6 downto 0);
+ end if;
+ else
+ insn <= insn;
+ end if;
+ when State_StoreSP =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ state <= State_StoreSP2;
+
+ out_mem_req <= '1';
+ mem_we <= '1';
+ mem_addr <= sp+spOffset;
+ mem_write <= stackA;
+ stackA <= stackB;
+ sp <= incSp;
+ else
+ insn <= insn;
+ end if;
+
+
+ when State_LoadSP =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ state <= State_LoadSP2;
+
+ sp <= decSp;
+ out_mem_req <= '1';
+ mem_we <= '1';
+ mem_addr <= incSp;
+ mem_write <= stackB;
+ else
+ insn <= insn;
+ end if;
+ when State_Emulate =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ sp <= decSp;
+ out_mem_req <= '1';
+ mem_we <= '1';
+ mem_addr <= incSp;
+ mem_write <= stackB;
+ stackA <= (others => DontCareValue);
+ stackA(maxAddrBitIncIO downto 0) <= pc + 1;
+ stackB <= stackA;
+
+ -- The emulate address is:
+ -- 98 7654 3210
+ -- 0000 00aa aaa0 0000
+ pc <= (others => '0');
+ pc(9 downto 5) <= opcode(conv_integer(pc(byteBits-1 downto 0)))(4 downto 0);
+ state <= State_Fetch;
+ else
+ insn <= insn;
+ end if;
+ when State_Callpcrel =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ stackA <= (others => DontCareValue);
+ stackA(maxAddrBitIncIO downto 0) <= pc + 1;
+
+ pc <= pc + stackA(maxAddrBitIncIO downto 0);
+ state <= State_Fetch;
+ else
+ insn <= insn;
+ end if;
+ when State_Call =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ stackA <= (others => DontCareValue);
+ stackA(maxAddrBitIncIO downto 0) <= pc + 1;
+ pc <= stackA(maxAddrBitIncIO downto 0);
+ state <= State_Fetch;
+ else
+ insn <= insn;
+ end if;
+ when State_AddSP =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ state <= State_AddSP2;
+
+ out_mem_req <= '1';
+ mem_addr <= sp+spOffset;
+ else
+ insn <= insn;
+ end if;
+ when State_PushSP =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= pc + 1;
+
+ sp <= decSp;
+ stackA <= (others => '0');
+ stackA(maxAddrBitIncIO downto minAddrBit) <= sp;
+ stackB <= stackA;
+ out_mem_req <= '1';
+ mem_we <= '1';
+ mem_addr <= incSp;
+ mem_write <= stackB;
+ else
+ insn <= insn;
+ end if;
+ when State_PopPC =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= stackA(maxAddrBitIncIO downto 0);
+ sp <= incSp;
+
+ out_mem_req <= '1';
+ mem_we <= '1';
+ mem_addr <= incSp;
+ mem_write <= stackB;
+ state <= State_Resync;
+ else
+ insn <= insn;
+ end if;
+ when State_PopPCRel =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= stackA(maxAddrBitIncIO downto 0) + pc;
+ sp <= incSp;
+
+ out_mem_req <= '1';
+ mem_we <= '1';
+ mem_addr <= incSp;
+ mem_write <= stackB;
+ state <= State_Resync;
+ else
+ insn <= insn;
+ end if;
+ when State_Add =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ stackA <= stackA + stackB;
+
+ out_mem_req <= '1';
+ mem_addr <= incIncSp;
+ sp <= incSp;
+ state <= State_Popped;
+ else
+ insn <= insn;
+ end if;
+ when State_Sub =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+ binaryOpResult <= stackB - stackA;
+ state <= State_BinaryOpResult;
+ when State_Pop =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ mem_addr <= incIncSp;
+ out_mem_req <= '1';
+ sp <= incSp;
+ stackA <= stackB;
+ state <= State_Popped;
+ else
+ insn <= insn;
+ end if;
+ when State_PopDown =>
+ if mem_busy='0' then
+ -- PopDown leaves top of stack unchanged
+ begin_inst <= '1';
+ idim_flag <= '0';
+ mem_addr <= incIncSp;
+ out_mem_req <= '1';
+ sp <= incSp;
+ state <= State_Popped;
+ else
+ insn <= insn;
+ end if;
+ when State_Or =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ stackA <= stackA or stackB;
+ out_mem_req <= '1';
+ mem_addr <= incIncSp;
+ sp <= incSp;
+ state <= State_Popped;
+ else
+ insn <= insn;
+ end if;
+ when State_And =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+
+ stackA <= stackA and stackB;
+ out_mem_req <= '1';
+ mem_addr <= incIncSp;
+ sp <= incSp;
+ state <= State_Popped;
+ else
+ insn <= insn;
+ end if;
+ when State_Eq =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+
+ binaryOpResult <= (others => '0');
+ if (stackA=stackB) then
+ binaryOpResult(0) <= '1';
+ end if;
+ state <= State_BinaryOpResult;
+ when State_Ulessthan =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+
+ binaryOpResult <= (others => '0');
+ if (stackA<stackB) then
+ binaryOpResult(0) <= '1';
+ end if;
+ state <= State_BinaryOpResult;
+ when State_Ulessthanorequal =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+
+ binaryOpResult <= (others => '0');
+ if (stackA<=stackB) then
+ binaryOpResult(0) <= '1';
+ end if;
+ state <= State_BinaryOpResult;
+ when State_Lessthan =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+
+ binaryOpResult <= (others => '0');
+ if (signed(stackA)<signed(stackB)) then
+ binaryOpResult(0) <= '1';
+ end if;
+ state <= State_BinaryOpResult;
+ when State_Lessthanorequal =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+
+ binaryOpResult <= (others => '0');
+ if (signed(stackA)<=signed(stackB)) then
+ binaryOpResult(0) <= '1';
+ end if;
+ state <= State_BinaryOpResult;
+ when State_Load =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ state <= State_Load2;
+
+ mem_addr <= stackA(maxAddrBitIncIO downto minAddrBit);
+ out_mem_req <= '1';
+ else
+ insn <= insn;
+ end if;
+
+ when State_Dup =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= pc + 1;
+
+ sp <= decSp;
+ stackB <= stackA;
+ mem_write <= stackB;
+ mem_addr <= incSp;
+ out_mem_req <= '1';
+ mem_we <= '1';
+ else
+ insn <= insn;
+ end if;
+ when State_DupStackB =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= pc + 1;
+
+ sp <= decSp;
+ stackA <= stackB;
+ stackB <= stackA;
+ mem_write <= stackB;
+ mem_addr <= incSp;
+ out_mem_req <= '1';
+ mem_we <= '1';
+ else
+ insn <= insn;
+ end if;
+ when State_Store =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= pc + 1;
+ mem_addr <= stackA(maxAddrBitIncIO downto minAddrBit);
+ mem_write <= stackB;
+ out_mem_req <= '1';
+ mem_we <= '1';
+ sp <= incIncSp;
+ state <= State_Resync;
+ else
+ insn <= insn;
+ end if;
+ when State_PopSP =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= pc + 1;
+
+ mem_write <= stackB;
+ mem_addr <= incSp;
+ out_mem_req <= '1';
+ mem_we <= '1';
+ sp <= stackA(maxAddrBitIncIO downto minAddrBit);
+ state <= State_Resync;
+ else
+ insn <= insn;
+ end if;
+ when State_Nop =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= pc + 1;
+ when State_Not =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= pc + 1;
+
+ stackA <= not stackA;
+ when State_Flip =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= pc + 1;
+
+ for i in 0 to wordSize-1 loop
+ stackA(i) <= stackA(wordSize-1-i);
+ end loop;
+ when State_AddTop =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= pc + 1;
+
+ stackA <= stackA + stackB;
+ when State_Shift =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= pc + 1;
+
+ stackA(wordSize-1 downto 1) <= stackA(wordSize-2 downto 0);
+ stackA(0) <= '0';
+ when State_Pushspadd =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+ pc <= pc + 1;
+
+ stackA <= (others => '0');
+ stackA(maxAddrBitIncIO downto minAddrBit) <= stackA(maxAddrBitIncIO-minAddrBit downto 0)+sp;
+ when State_Neqbranch =>
+ -- branches are almost always taken as they form loops
+ begin_inst <= '1';
+ idim_flag <= '0';
+ sp <= incIncSp;
+ if (stackB/=0) then
+ pc <= stackA(maxAddrBitIncIO downto 0) + pc;
+ else
+ pc <= pc + 1;
+ end if;
+ -- need to fetch stack again.
+ state <= State_Resync;
+ when State_Mult =>
+ begin_inst <= '1';
+ idim_flag <= '0';
+
+ multA <= stackA;
+ multB <= stackB;
+ state <= State_Mult2;
+ when State_Break =>
+ report "Break instruction encountered" severity failure;
+ break <= '1';
+
+ when State_Loadb =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ state <= State_Loadb2;
+
+ mem_addr <= stackA(maxAddrBitIncIO downto minAddrBit);
+ out_mem_req <= '1';
+ else
+ insn <= insn;
+ end if;
+ when State_Storeb =>
+ if mem_busy='0' then
+ begin_inst <= '1';
+ idim_flag <= '0';
+ state <= State_Storeb2;
+
+ mem_addr <= stackA(maxAddrBitIncIO downto minAddrBit);
+ out_mem_req <= '1';
+ else
+ insn <= insn;
+ end if;
+
+ when others =>
+-- sp <= (others => DontCareValue);
+ report "Illegal instruction" severity failure;
+ break <= '1';
+ end case;
+
+
+ when State_StoreSP2 =>
+ if mem_busy='0' then
+ mem_addr <= incSp;
+ out_mem_req <= '1';
+ state <= State_Popped;
+ end if;
+ when State_LoadSP2 =>
+ if mem_busy='0' then
+ state <= State_LoadSP3;
+ out_mem_req <= '1';
+ mem_addr <= sp+spOffset+1;
+ end if;
+ when State_LoadSP3 =>
+ if mem_busy='0' then
+ pc <= pc + 1;
+ state <= State_Execute;
+ stackB <= stackA;
+ stackA <= mem_read;
+ end if;
+ when State_AddSP2 =>
+ if mem_busy='0' then
+ pc <= pc + 1;
+ state <= State_Execute;
+ stackA <= stackA + mem_read;
+ end if;
+ when State_Load2 =>
+ if mem_busy='0' then
+ stackA <= mem_read;
+ pc <= pc + 1;
+ state <= State_Execute;
+ end if;
+ when State_Loadb2 =>
+ if mem_busy='0' then
+ stackA <= (others => '0');
+ stackA(7 downto 0) <= mem_read(((wordBytes-1-conv_integer(stackA(byteBits-1 downto 0)))*8+7) downto (wordBytes-1-conv_integer(stackA(byteBits-1 downto 0)))*8);
+ pc <= pc + 1;
+ state <= State_Execute;
+ end if;
+ when State_Storeb2 =>
+ if mem_busy='0' then
+ mem_addr <= stackA(maxAddrBitIncIO downto minAddrBit);
+ mem_write <= mem_read;
+ mem_write(((wordBytes-1-conv_integer(stackA(byteBits-1 downto 0)))*8+7) downto (wordBytes-1-conv_integer(stackA(byteBits-1 downto 0)))*8) <= stackB(7 downto 0) ;
+ out_mem_req <= '1';
+ mem_we <= '1';
+ pc <= pc + 1;
+ sp <= incIncSp;
+ state <= State_Resync;
+ end if;
+ when State_Fetch =>
+ if mem_busy='0' then
+ if interrupt='1' and inInterrupt='0' and idim_flag='0' then
+ -- We got an interrupt
+ inInterrupt <= '1';
+
+ sp <= decSp;
+ out_mem_req <= '1';
+ mem_we <= '1';
+ mem_addr <= incSp;
+ mem_write <= stackB;
+ stackA <= (others => DontCareValue);
+ stackA(maxAddrBitIncIO downto 0) <= pc;
+ stackB <= stackA;
+
+ pc <= conv_std_logic_vector(32, maxAddrBitIncIo+1); -- interrupt address
+
+ report "ZPU jumped to interrupt!" severity note;
+ else
+ mem_addr <= pc(maxAddrBitIncIO downto minAddrBit);
+ out_mem_req <= '1';
+ state <= State_Decode;
+ end if;
+ end if;
+ when State_Mult2 =>
+ state <= State_Mult3;
+ when State_Mult3 =>
+ state <= State_Mult4;
+ when State_Mult4 =>
+ state <= State_Mult5;
+ when State_Mult5 =>
+ stackA <= multResult3;
+ state <= State_Mult6;
+ when State_Mult6 =>
+ if mem_busy='0' then
+ out_mem_req <= '1';
+ mem_addr <= incIncSp;
+ sp <= incSp;
+ state <= State_Popped;
+ end if;
+ when State_BinaryOpResult =>
+ if mem_busy='0' then
+ -- NB!!!! we know that the memory isn't busy at this point!!!!
+ out_mem_req <= '1';
+ mem_addr <= incIncSp;
+ sp <= incSp;
+ stackA <= binaryOpResult;
+ state <= State_Popped;
+ end if;
+ when State_Popped =>
+ if mem_busy='0' then
+ pc <= pc + 1;
+ stackB <= mem_read;
+ state <= State_Execute;
+ end if;
+ when others =>
+-- sp <= (others => DontCareValue);
+ report "Illegal state" severity failure;
+ break <= '1';
+ end case;
+ end if;
+ end process;
+
+
+
+end behave;
diff --git a/fpga/usrp3/lib/zpu/core/zpupkg.vhd b/fpga/usrp3/lib/zpu/core/zpupkg.vhd
new file mode 100644
index 000000000..a7e6cf1d0
--- /dev/null
+++ b/fpga/usrp3/lib/zpu/core/zpupkg.vhd
@@ -0,0 +1,168 @@
+library IEEE;
+use IEEE.STD_LOGIC_1164.all;
+use IEEE.STD_LOGIC_ARITH.all;
+
+library work;
+use work.zpu_config.all;
+
+package zpupkg is
+
+ -- This bit is set for read/writes to IO
+ -- FIX!!! eventually this should be set to wordSize-1 so as to
+ -- to make the address of IO independent of amount of memory
+ -- reserved for CPU. Requires trivial tweaks in toolchain/runtime
+ -- libraries.
+
+ constant byteBits : integer := wordPower-3; -- # of bits in a word that addresses bytes
+ constant maxAddrBit : integer := maxAddrBitIncIO-1;
+ constant ioBit : integer := maxAddrBit+1;
+ constant wordSize : integer := 2**wordPower;
+ constant wordBytes : integer := wordSize/8;
+ constant minAddrBit : integer := byteBits;
+ -- configurable internal stack size. Probably going to be 16 after toolchain is done
+ constant stack_bits : integer := 5;
+ constant stack_size : integer := 2**stack_bits;
+
+ component dualport_ram is
+ port (clk : in std_logic;
+ memAWriteEnable : in std_logic;
+ memAAddr : in std_logic_vector(maxAddrBit downto minAddrBit);
+ memAWrite : in std_logic_vector(wordSize-1 downto 0);
+ memARead : out std_logic_vector(wordSize-1 downto 0);
+ memBWriteEnable : in std_logic;
+ memBAddr : in std_logic_vector(maxAddrBit downto minAddrBit);
+ memBWrite : in std_logic_vector(wordSize-1 downto 0);
+ memBRead : out std_logic_vector(wordSize-1 downto 0));
+ end component;
+
+ component dram is
+ port (clk : in std_logic;
+ areset : in std_logic;
+ mem_writeEnable : in std_logic;
+ mem_readEnable : in std_logic;
+ mem_addr : in std_logic_vector(maxAddrBit downto 0);
+ mem_write : in std_logic_vector(wordSize-1 downto 0);
+ mem_read : out std_logic_vector(wordSize-1 downto 0);
+ mem_busy : out std_logic;
+ mem_writeMask : in std_logic_vector(wordBytes-1 downto 0));
+ end component;
+
+
+ component trace is
+ port(
+ clk : in std_logic;
+ begin_inst : in std_logic;
+ pc : in std_logic_vector(maxAddrBitIncIO downto 0);
+ opcode : in std_logic_vector(7 downto 0);
+ sp : in std_logic_vector(maxAddrBitIncIO downto minAddrBit);
+ memA : in std_logic_vector(wordSize-1 downto 0);
+ memB : in std_logic_vector(wordSize-1 downto 0);
+ busy : in std_logic;
+ intSp : in std_logic_vector(stack_bits-1 downto 0)
+ );
+ end component;
+
+ component zpu_core is
+ port ( clk : in std_logic;
+ areset : in std_logic;
+ enable : in std_logic;
+ mem_req : out std_logic;
+ mem_we : out std_logic;
+ mem_ack : in std_logic;
+ mem_read : in std_logic_vector(wordSize-1 downto 0);
+ mem_write : out std_logic_vector(wordSize-1 downto 0);
+ out_mem_addr : out std_logic_vector(maxAddrBitIncIO downto 0);
+ mem_writeMask: out std_logic_vector(wordBytes-1 downto 0);
+ interrupt : in std_logic;
+ break : out std_logic;
+ zpu_status : out std_logic_vector(63 downto 0));
+ end component;
+
+
+
+ component timer is
+ port(
+ clk : in std_logic;
+ areset : in std_logic;
+ sample : in std_logic;
+ reset : in std_logic;
+ counter : out std_logic_vector(63 downto 0));
+ end component;
+
+ component zpuio is
+ port ( areset : in std_logic;
+ cpu_clk : in std_logic;
+ clk_status : in std_logic_vector(2 downto 0);
+ cpu_din : in std_logic_vector(15 downto 0);
+ cpu_a : in std_logic_vector(20 downto 0);
+ cpu_we : in std_logic_vector(1 downto 0);
+ cpu_re : in std_logic;
+ cpu_dout : inout std_logic_vector(15 downto 0));
+ end component;
+
+
+
+
+ -- opcode decode constants
+ constant OpCode_Im : std_logic_vector(7 downto 7) := "1";
+ constant OpCode_StoreSP : std_logic_vector(7 downto 5) := "010";
+ constant OpCode_LoadSP : std_logic_vector(7 downto 5) := "011";
+ constant OpCode_Emulate : std_logic_vector(7 downto 5) := "001";
+ constant OpCode_AddSP : std_logic_vector(7 downto 4) := "0001";
+ constant OpCode_Short : std_logic_vector(7 downto 4) := "0000";
+
+ constant OpCode_Break : std_logic_vector(3 downto 0) := "0000";
+ constant OpCode_Shiftleft: std_logic_vector(3 downto 0) := "0001";
+ constant OpCode_PushSP : std_logic_vector(3 downto 0) := "0010";
+ constant OpCode_PushInt : std_logic_vector(3 downto 0) := "0011";
+
+ constant OpCode_PopPC : std_logic_vector(3 downto 0) := "0100";
+ constant OpCode_Add : std_logic_vector(3 downto 0) := "0101";
+ constant OpCode_And : std_logic_vector(3 downto 0) := "0110";
+ constant OpCode_Or : std_logic_vector(3 downto 0) := "0111";
+
+ constant OpCode_Load : std_logic_vector(3 downto 0) := "1000";
+ constant OpCode_Not : std_logic_vector(3 downto 0) := "1001";
+ constant OpCode_Flip : std_logic_vector(3 downto 0) := "1010";
+ constant OpCode_Nop : std_logic_vector(3 downto 0) := "1011";
+
+ constant OpCode_Store : std_logic_vector(3 downto 0) := "1100";
+ constant OpCode_PopSP : std_logic_vector(3 downto 0) := "1101";
+ constant OpCode_Compare : std_logic_vector(3 downto 0) := "1110";
+ constant OpCode_PopInt : std_logic_vector(3 downto 0) := "1111";
+
+ constant OpCode_Lessthan : std_logic_vector(5 downto 0) := conv_std_logic_vector(36, 6);
+ constant OpCode_Lessthanorequal : std_logic_vector(5 downto 0) := conv_std_logic_vector(37, 6);
+ constant OpCode_Ulessthan : std_logic_vector(5 downto 0) := conv_std_logic_vector(38, 6);
+ constant OpCode_Ulessthanorequal : std_logic_vector(5 downto 0) := conv_std_logic_vector(39, 6);
+
+ constant OpCode_Swap : std_logic_vector(5 downto 0) := conv_std_logic_vector(40, 6);
+ constant OpCode_Mult : std_logic_vector(5 downto 0) := conv_std_logic_vector(41, 6);
+
+ constant OpCode_Lshiftright : std_logic_vector(5 downto 0) := conv_std_logic_vector(42, 6);
+ constant OpCode_Ashiftleft : std_logic_vector(5 downto 0) := conv_std_logic_vector(43, 6);
+ constant OpCode_Ashiftright : std_logic_vector(5 downto 0) := conv_std_logic_vector(44, 6);
+ constant OpCode_Call : std_logic_vector(5 downto 0) := conv_std_logic_vector(45, 6);
+
+ constant OpCode_Eq : std_logic_vector(5 downto 0) := conv_std_logic_vector(46, 6);
+ constant OpCode_Neq : std_logic_vector(5 downto 0) := conv_std_logic_vector(47, 6);
+
+ constant OpCode_Sub : std_logic_vector(5 downto 0) := conv_std_logic_vector(49, 6);
+ constant OpCode_Loadb : std_logic_vector(5 downto 0) := conv_std_logic_vector(51, 6);
+ constant OpCode_Storeb : std_logic_vector(5 downto 0) := conv_std_logic_vector(52, 6);
+
+ constant OpCode_Eqbranch : std_logic_vector(5 downto 0) := conv_std_logic_vector(55, 6);
+ constant OpCode_Neqbranch : std_logic_vector(5 downto 0) := conv_std_logic_vector(56, 6);
+ constant OpCode_Poppcrel : std_logic_vector(5 downto 0) := conv_std_logic_vector(57, 6);
+
+ constant OpCode_Pushspadd : std_logic_vector(5 downto 0) := conv_std_logic_vector(61, 6);
+ constant OpCode_Mult16x16 : std_logic_vector(5 downto 0) := conv_std_logic_vector(62, 6);
+ constant OpCode_Callpcrel : std_logic_vector(5 downto 0) := conv_std_logic_vector(63, 6);
+
+
+
+ constant OpCode_Size : integer := 8;
+
+
+
+end zpupkg;