summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjjsuperpower <jjs29356@gmail.com>2022-08-21 22:33:56 -0500
committerjjsuperpower <jjs29356@gmail.com>2022-08-21 22:33:56 -0500
commitcad06d86cd309074fffb5cce9d1f3b79b40f3891 (patch)
tree22f56aeb2cc91cffbe42a64efb305d516a2e428d
parentc27ad0c92e710e14b7d6d50839936ceda51dd017 (diff)
basic alu coded and tested
-rw-r--r--.gitignore4
-rw-r--r--.vscode/.gitignore1
-rw-r--r--doc/ASAP32-ISA.md229
-rw-r--r--hdl/core.py247
4 files changed, 469 insertions, 12 deletions
diff --git a/.gitignore b/.gitignore
index 1642c9d..ec81623 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,6 @@ __pycache__/
*.vcd
*.cc
*.bak
-*.pytest_cache \ No newline at end of file
+*.pytest_cache
+*.pyc
+*.log \ No newline at end of file
diff --git a/.vscode/.gitignore b/.vscode/.gitignore
new file mode 100644
index 0000000..bf0824e
--- /dev/null
+++ b/.vscode/.gitignore
@@ -0,0 +1 @@
+*.log \ No newline at end of file
diff --git a/doc/ASAP32-ISA.md b/doc/ASAP32-ISA.md
new file mode 100644
index 0000000..b5ed60d
--- /dev/null
+++ b/doc/ASAP32-ISA.md
@@ -0,0 +1,229 @@
+# Vertex KISS 32 - Machine Code Spec
+^ So Vertex is already a name for an FPGA, should we change this?
+I propose a different name:
+
+ ASAP Soc 32 a KISS inspired architecture
+
+ As
+ Simple
+ As
+ Possible
+
+## General Instruction Format
+ X = HEX
+ B = BIN
+
+ d = HEX (not used / don't care)
+
+ MAX INSTRUCTIIONS = 256
+ ALL INSTRUCTIONS ARE ATOMIC
+
+ ALL MEMORY ADDRESSES ARE 32-bit, not 8-bit
+ ^ What do you think about this? 32 bits is usually and 'int' in C
+ This would extend the address space, kinda
+
+ Also I have been reading about caching, I think the instruction width needs to be the same as the data memory
+
+### C-Type, Control
+ XX X X XXXX
+ Opcode d RS1 IMM
+
+### I-Type, Immediate
+ XX X X XXXX
+ Opcode RD RS1 IMM
+
+### R-Type, Arithmetic
+ XX X X X XXX
+ Opcode RD RS1 RS2 ddd
+
+### JR-Type, Compare and Jump
+ XX X X X XXX
+ Opcode Jump Condition RS1 RS2 ddd
+
+### JI-Type, Compare and Jump
+ XX X X XXXX
+ Opcode Jump Condition RS1 IMM
+
+## Registers
+ Maximum registers = 16
+ Register width = 32
+ All are R/W except 0X
+
+ 0X Always Zero
+ AX GP-0
+ BX GP-1
+ CX GP-2
+ DX GP-3
+ EX GP-4
+ FX GP-5
+ GX GP-6
+ HX GP-7
+ HI Mult/Div Hi
+ LO Mult/Div Lo
+ FG Processor Flags
+ CR Control register (Writable only in supervisor mode)
+ IP Instruction Pointer
+ SP Stack Pointer
+ JA Jump Address
+
+ ^ I have added this, it is part of my proposal for changing how jumps work
+
+
+### FG Flag Register Bitfield
+ These registers are Read/Write
+ They are automaticaly writen two by the processor
+
+ FG[0] Carry
+ FG[1] Overflow
+ FG[2] Zero
+ FG[3] Sign
+ FG[4-31] RESERVED
+
+### CR Control Register Bitfield
+ These register are Read/Write in System mode, Read Only in User Mode
+
+ CR[0] Interupt Enable
+ CR[1] User Mode
+ CR[2] DMA
+ CR[2-31] RESERVED
+
+
+
+## Integer Instructions
+
+### R-Type
+ ADD RD, RS1, RS2 RD = RS1 + RS2
+ SUB RD, RS1, RS2 RD = RS1 - RS2
+ XOR RD, RS1, RS2 RD = RS1 ^ RS2
+ OR RD, RS1, RS2 RD = RS1 | RS2
+ AND RD, RS1, RS2 RD = RS1 & RS2
+ LSL RD, RS1, RS2 RD = RS1 << RS2 (logical)
+ LSR RD, RS1, RS2 RD = RS1 >> RS2 (logical)
+ ASR RD, RS1, RS2 RD = RS1 >> RS2
+ MUL RD, RS1, RS2 HI,LO = RS1 * RS2
+ MULU RD, RS1, RS2 HI,LO = RS1 * RS2
+ DIV RD, RS1, RS2 HI,LO = RS1 / RS2
+ DIVU RD, RS1, RS2 HI,LO = RS1 / RS2 (unsigned)
+
+ #LDB RD, RS1, RS2 RD = &(RS1 + RS2) Load Byte
+ #STB RD, RS1, RS2 &(RS1 + RS2) = (RD >> 24) Store Byte
+ LDW RD, RS1, RS2 RD = &(RS1 + RS2) Load Word (4 bytes)
+ STW RD, RS1, RS2 &(RS1 + RS2) = RD Store Word (4 bytes)
+
+ # Depricated?
+
+
+### I-Type
+ ADDI RD, RS, IMM RD = RS + IMM
+ SUBI RD, RS, IMM RD = RS - IMM
+ XORI RD, RS, IMM RD = RS ^ IMM
+ ORI RD, RS, IMM RD = RS | IMM
+ ANDI RD, RS, IMM RD = RS & IMM
+ LSLI RD, RS, IMM RD = RS << IMM (logical)
+ LSRI RD, RS, IMM RD = RS >> IMM (logical)
+ ASRI RD, RS, IMM RD = RS >> IMM
+ MULI dd, RS, IMM HI,LO = RS * IMM
+ MULIU dd, RS, IMM HI,LO = RS * IMM
+ DIVI dd, RS, IMM HI,LO = RS / IMM
+ DIVIU dd, RS, IMM HI,LO = RS / IMM (unsigned)
+
+ #LDBI RD, RS, RS2 RD = &(RS + IMM) Load Byte
+ #STBI RD, RS, RS2 &(RS + IMM) = (RD >> 24) Store Byte
+ LDWI RD, RS, RS2 RD = &(RS + IMM) Load Word (4 bytes)
+ STWI RD, RS, RS2 &(RS + IMM) = RD Store Word (4 bytes)
+
+ # Depricated?
+
+
+### JR Instructions
+ Compare and then jump (IP = JMP)
+
+ JMP 0 if (True)
+ JMP 1 if (RS1 != RS2)
+ JMP 2 if (RS1 == RS2)
+ JMP 3 if (RS1 > RS2) Unsigned
+ JMP 4 if (RS1 >= RS2) Unsigned
+
+ JMP C if (RS1 > RS2) Signed
+ JMP D if (RS1 >= RS2) Signed
+
+### JI Instructions
+ Compare and then jump (IP = JMP)
+
+ JMPI 0 if (True)
+ JMPI 1 if (RS1 != IMM)
+ JMPI 2 if (RS1 == IMM)
+ JMPI 3 if (RS1 > IMM) Unsigned
+ JMPI 4 if (RS1 >= IMM) Unsigned
+
+ JMPI C if (RS1 > IMM) Signed
+ JMPI D if (RS1 >= IMM) Signed
+
+### Jump Aliases
+
+ JEQ
+ JLT
+ JGT
+ JLE
+ JGE
+ JLTU
+ JGTU
+ JLEU
+ JGEU
+
+
+### Control Instructions
+ NOP Do nothing -> opcode = ZERO
+ PUSHR RS SP+=1 ;*SP = RS
+ POPR RS RS = *SP ;SP-=1
+ PUSHA PUSHR AX, BX, CX, DX, EX, FX, GX, FX, HI, LO, FG, CR, IP, SP, JA
+ POPA POP reverse PUSHR, SP not affected
+ PUSHI IMM SP+=1 ;*SP = IMM
+ INVP IMM Invalidate entry in TLB
+ RET POPR IP;
+ CALL PUSHR IP; IP = JMP;
+ INT PUSHA ;IP = IDT[IMM]
+ IRET POPR FLG; POPR SP; POPR IP
+ SIF Set interrupt flag
+ CIF Clear interrupt flag
+
+## Interrupt Descriptor Table
+This will be in a fixed memory location, this will contain pointers to the interupt function. Once an interupt is entered, all interupts are turned off.
+
+ IDT[0] Divide-by-zero exception
+ IDT[1] Hardware error (NMI)
+ IDT[2] Overflow
+ IDT[3] Invalid Opcode
+ IDT[4] General-protection fault
+ IDT[5] TLB miss
+ IDT[6] Software interrupt (reserved for OS)
+ IDT[7-31] Platform interrupts (PIC, hard drive, keyboard, etc.)
+
+ IDTMSK[0-31] Interupt mask, when interupt is entered the mask bit for the coorisponding interupt will be disabled.
+ The software is responsible for renabling the mask bit
+
+You get 32 :)
+Also, I was think of making the OS handle TLP misses
+
+
+
+
+
+## Page Directory
+
+The page directory contains 1024 page tables that have 1024 entries.
+
+^ Stupid question: Do we need a page directory? Also I have a very limited size for cache, idt, tlb, etc. Plan on having around 100 Kbits
+
+### Page table layout
+
+ PT[0] Present
+ PT[1] R/W
+ PT[2] User-mode
+ PT[3-4] RESERVED
+ PT[5] Accessed
+ PT[6-7] RESERVED
+ PT[8-31] Physical address of page table (XX * 2^16 + XXXX)
+
+ *This is still WIP but I wanted to get your input on the layout. I also have
+ the jank memory offset that will more than likely change.*
diff --git a/hdl/core.py b/hdl/core.py
index 6ef02fb..a75b92e 100644
--- a/hdl/core.py
+++ b/hdl/core.py
@@ -3,29 +3,254 @@ from amaranth.sim import Simulator, Settle, Delay
from utils import cmd
-class Template(Elaboratable):
+
+# class Reg(Elaboratable):
+# def __init__(self):
+# self.rd_addr = Signal(4)
+# self.rs1_addr = Signal(4)
+# self.rs2_addr = Signal(4)
+
+# self.rd = Signal(32)
+# self.rs1 = Signal(32)
+# self.rs2 = Signal(32)
+
+# self.zx = Signal(32)
+# self.ax = Signal(32)
+# self.bx = Signal(32)
+# self.bx = Signal(32)
+# self.cx = Signal(32)
+# self.dx = Signal(32)
+# self.ex = Signal(32)
+# self.fx = Signal(32)
+# self.gx = Signal(32)
+# self.hx = Signal(32)
+# self.hi = Signal(32)
+# self.lo = Signal(32)
+# self.fg = Signal(32)
+# self.cr = Signal(32)
+# self.ip = Signal(32)
+# self.sp = Signal(32)
+# self.ja = Signal(32)
+
+# self.reg_ar = Array([self.zx, self.ax, self.bx, self.cx, self.dx, self.ex, self.fx, self.gx, self.hx, self.hi, self.lo, self.fg, self.cr, self.ip, self.sp, self.ja])
+
+# # TODO: add support for storing multiplication result
+# self.ports = [self.rd_addr, self.rs1_addr, self.rs2_addr, self.rd, self.rs1, self.rs2, self.ip]
+
+# def elaborate(self, platform=None):
+# m = Module()
+
+# with m.If(self.rd_addr != 0):
+# m.d.sync += self.reg_ar[self.rd_addr].eq(self.rd)
+
+# m.d.comb += self.rs1.eq(self.reg_ar[self.rs1_addr])
+# m.d.comb += self.rs2.eq(self.reg_ar[self.rs2_addr])
+
+# return m
+
+
+# class ASAP32Core(Elaboratable):
+# def __init__(self):
+# self.interupt_msk = Signal(32)
+# self.interupt_addr = Signal(32)
+# self.interupt_en = Signal(1)
+# self.interupt_sig = Signal(1)
+
+# self.jump = Signal(1)
+# self.instruction_addr = Signal(32)
+
+# self.ports = []
+
+# def elaborate(self, platform=None):
+# m = Module()
+
+# m.submodules.reg = reg = Reg()
+
+# # interupt setup
+# m.d.comb += self.interupt_en.eq(reg.cr[0])
+
+# # get instruction address, account for jumps and interupts
+# m.d.sync += self.instruction_addr.eq(Mux(self.interupt_en & self.interupt_sig, self.interupt_addr, Mux(self.jump, reg.ja, reg.ip)))
+
+# # update program counter
+# m.d.sync += reg.ip.eq(self.instruction_addr + 1)
+
+# return m
+
+class ALU(Elaboratable):
def __init__(self):
- ...
+ self.in1 = Signal(32)
+ self.in2 = Signal(32)
+ self.out = Signal(32)
+ self.op = Signal(4)
- self.ports = [...]
+ self.tmp = Signal(33)
+ self.signed_op = Signal(1)
- def elaborate(self, platform):
+ self.carry = Signal(1)
+ self.overflow = Signal(1)
+ self.zero = Signal(1)
+ self.sign = Signal(1)
+
+ self.ports = [self.in1, self.in2, self.out, self.op]
+
+ def elaborate(self, platform=None):
m = Module()
- ...
+ # dummy sync for simulation only
+ if platform is None:
+ dumb = Signal()
+ m.d.sync += dumb.eq(~dumb)
+ with m.Switch(self.op):
+ with m.Case(0b0000):
+ m.d.comb += self.tmp.eq(self.in1 + self.in2)
+ with m.Case(0b0010):
+ m.d.comb += self.tmp.eq(self.in1.as_signed() + self.in2.as_signed())
+ m.d.comb += self.signed_op.eq(1)
+ with m.Case(0b0001):
+ m.d.comb += self.tmp.eq(self.in1 - self.in2)
+ with m.Case(0b0011):
+ m.d.comb += self.tmp.eq(self.in1.as_signed() - self.in2.as_signed())
+ m.d.comb += self.signed_op.eq(1)
+ with m.Case(4):
+ m.d.comb += self.tmp.eq(Cat(self.in1 & self.in2, 0))
+ with m.Case(5):
+ m.d.comb += self.tmp.eq(Cat(self.in1 | self.in2, 0))
+ with m.Case(6):
+ m.d.comb += self.tmp.eq(Cat(self.in1 ^ self.in2, 0))
+ with m.Case(7):
+ m.d.comb += self.tmp.eq(Cat(self.in1 << self.in2[0:5], 0))
+ with m.Case(8):
+ m.d.comb += self.tmp.eq(Cat(self.in1 >> self.in2[0:5], 0))
+ with m.Case(9):
+ m.d.comb += self.tmp.eq(Cat(self.in1.as_signed() >> self.in2[0:5], 0))
+ with m.Case():
+ m.d.comb += self.signed_op.eq(0)
+ m.d.comb += self.tmp.eq(0)
+
+ m.d.comb += self.carry.eq(self.tmp[32])
+ m.d.comb += self.overflow.eq(self.tmp[32] ^ self.tmp[31])
+ m.d.comb += self.sign.eq(self.tmp.as_signed() < 0)
+ m.d.comb += self.zero.eq(self.out == 0)
+
+ m.d.comb += self.out.eq(self.tmp[0:32])
+
return m
+def test_alu(filename="alu.vcd"):
+ dut = ALU()
+ def proc1():
+ def sub_proc(val1, val2):
+ yield dut.in1.eq(val1)
+ yield dut.in2.eq(val2)
+ yield
+ yield Settle()
+
+ # test unsigned addition
+ yield dut.op.eq(0b0000)
+ yield from sub_proc(27, 13)
+ out = yield dut.out
+ assert 27 + 13 == (out), f'ERROR: {out} != {27 + 13}'
+
+ # test signed addition
+ yield dut.op.eq(0b0010)
+ yield from sub_proc(-11, 43)
+ out = yield dut.out.as_signed()
+ assert -11 + 43 == out, f'ERROR: {out} != {-11 + 43}'
+ # test unsigned subtraction
+ yield dut.op.eq(0b0001)
+ yield from sub_proc(25, 13)
+ out = yield dut.out
+ assert 25 - 13 == out, f'ERROR: {out} != {25 - 13}'
+ # test signed subtraction
+ yield dut.op.eq(0b0011)
+ yield from sub_proc(25, -13)
+ out = yield dut.out.as_signed()
+ assert 25 + 13 == out, f'ERROR: {out} != {25 + 13}'
-def test(filename="out.vcd"):
- dut = ...
+ # test unsigned logical and
+ yield dut.op.eq(4)
+ yield from sub_proc(0b10101011, 0b01010101)
+ out = yield dut.out
+ assert 0b00000001 == out, f'ERROR: {out} != {0b00000001}'
- def proc1():
- ...
+ # test unsigned logical or
+ yield dut.op.eq(5)
+ yield from sub_proc(0b10101011, 0b01000101)
+ out = yield dut.out
+ assert 0b11101111 == out, f'ERROR: {out} != {0b11101111}'
+
+ # test logical xor
+ yield dut.op.eq(6)
+ yield from sub_proc(0b10001011, 0b01000101)
+ out = yield dut.out
+ assert 0b11001110 == out, f'ERROR: {out} != {0b11001110}'
+ # test logical shift left
+ yield dut.op.eq(7)
+ yield from sub_proc(0b10001011, 5) # shift left by 5
+ out = yield dut.out
+ assert 0b1000101100000 == out, f'ERROR: {out} != {0b1000101100000}'
+
+ # test logical shift right
+ yield dut.op.eq(8)
+ yield from sub_proc(0b10001011, 5) # shift right by 5
+ out = yield dut.out
+ assert 0b100 == out, f'ERROR: {out} != {0b100}'
+
+ # test aligned shift right
+ yield dut.op.eq(9)
+ yield from sub_proc(0x80001234, 4) # shift right by 4
+ out = yield dut.out
+ assert 0xF8000123 == out, f'ERROR: {out} != {0xF8000123}'
+
+ # test unsigned overflow
+ yield dut.op.eq(0b0000)
+ yield from sub_proc(0xFFFFFFFF, 1) # add 1 to 0xFFFFFFFF
+ out = yield dut.overflow
+ assert out == 1, f'ERROR: {out} != {1}'
+ out = yield dut.carry
+ assert out == 1, f'ERROR: {out} != {1}'
+
+ # test signed overflow
+ yield dut.op.eq(0b0010)
+ yield from sub_proc(0x7FFFFFFF, 1) # add 1 to 0x7FFFFFFF
+ out = yield dut.overflow
+ assert out == 1, f'ERROR: {out} != {1}'
+ out = yield dut.carry
+ assert out == 0, f'ERROR: {out} != {0}'
+
+ # test unsigned underflow
+ yield dut.op.eq(0b0001)
+ yield from sub_proc(0, -1) # subtract 1 from 0
+ out = yield dut.overflow
+ assert out == 1, f'ERROR: {out} != {1}'
+ out = yield dut.carry
+ assert out == 1, f'ERROR: {out} != {1}'
+
+ # test signed underflow
+ yield dut.op.eq(0b0010)
+ yield from sub_proc(0x80000000, -1) # sub 1 from 0x80000000 (most negative number in two's complement)
+ assert out == 1, f'ERROR: {out} != {1}'
+ out = yield dut.carry
+ assert out == 1, f'ERROR: {out} != {1}'
+
+ # test zero
+ yield dut.op.eq(0b0000)
+ yield from sub_proc(0, 0) # add 0 to 0
+ out = yield dut.zero
+ assert out == 1, f'ERROR: {out} != {1}'
+
+ # test zero
+ yield dut.op.eq(0b0000)
+ yield from sub_proc(0, 1) # add 0 to 0
+ out = yield dut.zero
+ assert out == 0, f'ERROR: {out} != {0}'
+
sim = Simulator(dut)
sim.add_clock(1e-6)
@@ -36,5 +261,5 @@ def test(filename="out.vcd"):
if __name__ == '__main__':
- shift_reg = Template(...)
- cmd(shift_reg, test) \ No newline at end of file
+ hdl = ALU()
+ cmd(hdl, test_alu)