-------------------------------------------------------------------------------------------------------- -- -- This implementation performs multiplication of two 8x8 8bit Matrices (A and B)and outputs results -- to a third 8x8 16bit Matrix. All of the major components were instantiated as IP cores which included: -- 2 BRAM 64x8, BRAM 64x16, 8 bit Multiplier (using soft implementation), ChipScope Pro. -- ChipScope Pro was used to output and verify the results of the multiplication -- -------------------------------------------------------------------------------------------------------- library IEEE; use IEEE.STD_LOGIC_1164.ALL; use IEEE.STD_LOGIC_ARITH.ALL; use IEEE.STD_LOGIC_UNSIGNED.ALL; -- Matrix multiplier entity definition entity multiplier is Port ( clk : in STD_LOGIC; -- Incoming clock switch: in std_logic; -- South Button on the S3 Starter Kit led : out STD_LOGIC); -- LED 0 on the S3 Starter Kit end multiplier; architecture Behavioral of multiplier is -- Declaration of the generated core of Block RAM which was initialized to -- predetermined values stored in bram.coe. RAM is organized into 64X8bit component BRAM port ( addr: IN std_logic_VECTOR(5 downto 0); clk: IN std_logic; din: IN std_logic_VECTOR(7 downto 0); dout: OUT std_logic_VECTOR(7 downto 0); we: IN std_logic); end component; -- Declaration of the generated core of Block RAM. RAM is organized into 64X16bit component BRAM_16 port ( addr: IN std_logic_VECTOR(5 downto 0); clk: IN std_logic; din: IN std_logic_VECTOR(15 downto 0); dout: OUT std_logic_VECTOR(15 downto 0); we: IN std_logic); end component; -- Declaration of the multiplier core with A and B as an 8bit input and P as 16 bit output. component mult port ( clk: IN std_logic; a: IN std_logic_VECTOR(7 downto 0); b: IN std_logic_VECTOR(7 downto 0); p: OUT std_logic_VECTOR(15 downto 0)); end component; -- Declaration of the ChipScope Pro Integrated Controller component icon port ( control0 : out std_logic_vector(35 downto 0) ); end component; -- Declaration of the ChipScope Pro Logic Analyzer which connects to ICON component ila port ( control : in std_logic_vector(35 downto 0); clk : in std_logic; data : in std_logic_vector(82 downto 0); trig0 : in std_logic_vector(7 downto 0) ); end component; -- Declaration of signals/busses for this project signal counter : std_logic_vector (31 downto 0); signal state: std_logic_vector(2 downto 0):="000"; signal row_counter: std_logic_vector(2 downto 0):="000"; signal col_counter: std_logic_vector(2 downto 0):="000"; signal done: std_logic:='0'; -- Signals for BRAMs signal A_addr, B_addr, C_addr: std_logic_vector(5 downto 0):="000000"; signal A_din, B_din, A_dout, B_dout, a_value, b_value: std_logic_vector( 7 downto 0); signal A_we, B_we, C_we: std_logic:='0'; -- Signals for multiplier signal p_value,C_dout, C_din, partial_sum: std_logic_vector( 15 downto 0); -- Signals for ChipScope Pro signal control0 : std_logic_vector(35 downto 0); signal data : std_logic_vector(82 downto 0); signal trig0 : std_logic_vector(7 downto 0); begin -- Port Maps for various components -- BRAM for storage initial values for matrix A A_Matrix : BRAM port map ( addr => A_addr, clk => clk, din => A_din, dout => A_dout, we => A_we); -- BRAM for storage initial values for matrix B B_Matrix : BRAM port map ( addr => B_addr, clk => clk, din => B_din, dout => B_dout, we => B_we); -- BRAM for matrix C to store results of multiplication of AxB C_Matrix: BRAM_16 port map ( addr => C_addr, clk => clk, din => C_din, dout => C_dout, we => C_we); -- Multiplier which takes 2 8bit values and produces a 16bit result within 2 clock cycles Multiplier: mult port map ( clk => clk, a => a_value, b => b_value, p => p_value); -- ChipScope Pro Integrated Controller which communicates to the ChipScope Pro analyzer on PC i_icon : icon port map ( control0 => control0 ); -- ChipScope Pro Integrated Logic Analyzer which communicates to the ICON i_ila : ila port map ( control => control0, clk => clk, data => data, -- Data ports trig0 => trig0 -- Tringerring Ports ); -- This process takes care of incrementing/resetting counter and state values process (clk) begin -- All events operate on the rising clock signal if(clk'Event and clk='1') then -- If the button is pressed then increment counter if(switch='1') then counter<=counter+'1'; -- Value will roll over to 0 after ~86 seconds state<=state+1; -- Value will roll over to 0 after 8 clock cycles else counter<=(others =>'0'); -- if switch is not pressed reset counter and current state to 0 state<="000"; end if; end if; end process; -- This process performs the row and column counting for matrix A and B process (clk) begin if(clk'Event and clk='1') then -- All the operations in this if statement are performed -- only if "done=0" meaning full multiplication is not complete yet if(done='0') then if(state="111") then -- increment column counter on the state 7 col_counter<=col_counter+'1'; end if; -- increment row counter on the state 7 and when column counter reached 7 as well -- Note: counting is from 0->7 if(col_counter="111" and state="111") then row_counter<=row_counter+'1'; end if; -- if row, column, and state counters reached 7 this means computation is complete -- and we set done to 1 if(row_counter="111" and state="111" and col_counter="111") then done<='1'; end if; end if; -- this is done to simply delay operation of the multiplier by 40 microseconds if(counter="00000000000000000000100000000000") then done<='0'; end if; end if; end process; -- This process deals with assignment of addresses/data to/from BRAMs -- that hold values of matrices A, B, and C. process (clk) begin if(clk'Event and clk='1') then if(counter<"0000000000000000000001000000011") then -- Simply assign the row and column counters with state value to form an address -- for matrix A and B A_addr<=row_counter & state; B_addr<=state & col_counter; a_value<=A_dout; b_value<=B_dout; -- If state reaches 0 perform writing of the result to the BRAM holding matrix C -- by asserting write enable for one clock cycle if(state="000") then C_we<='1'; else C_we<='0'; end if; -- Since operation of the multiplication using soft multiplier core takes 2 clock -- cycle, and one clock cycle is used to access data from the BRAM the first result -- is added to the partial sum only on third clock cycle. Therefore, only on third -- clock cycle it can be reset for adding following (i+1) cell calculation. At the -- same time final result of (i) cell is outputted to the BRAM responsible for storage -- of matrix C if(state="011") then C_addr<=row_counter&col_counter-'1'; C_din<=partial_sum+ p_value; partial_sum<=(others=>'0'); else partial_sum<=partial_sum+p_value; end if; end if; -- Resetting address for C matrix BRAM to first cell if(counter="0000000000000000000001000011000") then C_addr<="000000"; end if; if(counter>"0000000000000000000001000011100" and counter <"0000000000000000000001001011101") then C_addr<=C_addr+'1'; end if; end if; end process; -- Outputs to ChipScope Pro for verification -- Directory contains ChipScope Pro Init file with corresponding bus names and triger setup -- it has to be loaded after the bitstream was configured onto the platform -- Trigger was setup to the button to initiate the ChipScope Pro capturing on the button press trig0(0)<=switch; -- Output of the BRAM for A matrix data(7 downto 0)<= A_dout; -- Output of the BRAM for B matrix data(15 downto 8)<= B_dout; -- Output of the product of multiplication data(31 downto 16)<=p_value; -- Output of the partial sum for row,col multiplications data(47 downto 32)<=partial_sum; -- Current state of the operation data(50 downto 48)<=state; -- Which row and column is being accessed data(53 downto 51)<=row_counter; data(56 downto 54)<=col_counter; -- Output of the resulting matrix C, or current counter 16bit value data(72 downto 57)<=C_dout;--counter(15 downto 0); -- Current position of the switch data(73)<=switch; -- Indicator if the calculation is finished data(74)<=done; -- Address which is supplied to C matrix data(80 downto 75)<=C_addr; -- LED indicating the operation time led<=NOT done; end Behavioral;