From 681036ddc3891904f13c84197cd90e2472cb3e1c Mon Sep 17 00:00:00 2001 From: Ekaitz Zarraga Date: Fri, 23 Jul 2021 20:49:14 +0200 Subject: Change folder name --- pysc-v/Frontend/__init__.py | 0 pysc-v/Frontend/lexer.py | 269 ------------------- pysc-v/Frontend/logger.py | 10 - pysc-v/Frontend/parser.py | 1 - pysc-v/Frontend/reader.py | 51 ---- pysc-v/InstructionSets/RV32C.py | 29 -- pysc-v/InstructionSets/RV32D.py | 0 pysc-v/InstructionSets/RV32F.py | 0 pysc-v/InstructionSets/RV32I.py | 461 -------------------------------- pysc-v/InstructionSets/RV64I.py | 63 ----- pysc-v/InstructionSets/__init__.py | 0 pysc-v/InstructionSets/instructions.py | 36 --- pysc-v/main.py | 5 - pysc-v/memory.py | 112 -------- pysc-v/registers/RV32F.py | 45 ---- pysc-v/registers/RV32I.py | 117 --------- pysc-v/registers/__init__.py | 0 pyscv/Frontend/__init__.py | 0 pyscv/Frontend/lexer.py | 292 +++++++++++++++++++++ pyscv/Frontend/logger.py | 10 + pyscv/Frontend/parser.py | 1 + pyscv/Frontend/reader.py | 51 ++++ pyscv/InstructionSets/RV32C.py | 29 ++ pyscv/InstructionSets/RV32D.py | 0 pyscv/InstructionSets/RV32F.py | 0 pyscv/InstructionSets/RV32I.py | 466 +++++++++++++++++++++++++++++++++ pyscv/InstructionSets/RV64I.py | 63 +++++ pyscv/InstructionSets/__init__.py | 0 pyscv/InstructionSets/instructions.py | 36 +++ pyscv/main.py | 8 + pyscv/memory.py | 112 ++++++++ pyscv/registers/RV32F.py | 45 ++++ pyscv/registers/RV32I.py | 117 +++++++++ pyscv/registers/__init__.py | 0 34 files changed, 1230 insertions(+), 1199 deletions(-) delete mode 100644 pysc-v/Frontend/__init__.py delete mode 100644 pysc-v/Frontend/lexer.py delete mode 100644 pysc-v/Frontend/logger.py delete mode 100644 pysc-v/Frontend/parser.py delete mode 100644 pysc-v/Frontend/reader.py delete mode 100644 pysc-v/InstructionSets/RV32C.py delete mode 100644 pysc-v/InstructionSets/RV32D.py delete mode 100644 pysc-v/InstructionSets/RV32F.py delete mode 100644 pysc-v/InstructionSets/RV32I.py delete mode 100644 pysc-v/InstructionSets/RV64I.py delete mode 100644 pysc-v/InstructionSets/__init__.py delete mode 100644 pysc-v/InstructionSets/instructions.py delete mode 100644 pysc-v/main.py delete mode 100644 pysc-v/memory.py delete mode 100644 pysc-v/registers/RV32F.py delete mode 100644 pysc-v/registers/RV32I.py delete mode 100644 pysc-v/registers/__init__.py create mode 100644 pyscv/Frontend/__init__.py create mode 100644 pyscv/Frontend/lexer.py create mode 100644 pyscv/Frontend/logger.py create mode 100644 pyscv/Frontend/parser.py create mode 100644 pyscv/Frontend/reader.py create mode 100644 pyscv/InstructionSets/RV32C.py create mode 100644 pyscv/InstructionSets/RV32D.py create mode 100644 pyscv/InstructionSets/RV32F.py create mode 100644 pyscv/InstructionSets/RV32I.py create mode 100644 pyscv/InstructionSets/RV64I.py create mode 100644 pyscv/InstructionSets/__init__.py create mode 100644 pyscv/InstructionSets/instructions.py create mode 100644 pyscv/main.py create mode 100644 pyscv/memory.py create mode 100644 pyscv/registers/RV32F.py create mode 100644 pyscv/registers/RV32I.py create mode 100644 pyscv/registers/__init__.py diff --git a/pysc-v/Frontend/__init__.py b/pysc-v/Frontend/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pysc-v/Frontend/lexer.py b/pysc-v/Frontend/lexer.py deleted file mode 100644 index c4a9a7d..0000000 --- a/pysc-v/Frontend/lexer.py +++ /dev/null @@ -1,269 +0,0 @@ -# TODO Logging is interesting for debugging purposes, decide what to do with it -from logger import newlogger -logger = newlogger(__name__) -#import logging -#logger.setLevel(logging.WARN) - - -###### - -from enum import Enum -from reader import Reader - -binChars = set("01") -octChars = set("01234567") -decChars = set("0123456789") -hexChars = set("0123456789ABCDEFabcdef") - -class TokenType(Enum): - """ - These are the possible tokens that the lexer knows, they are converted to - their most accurate representation in python. - """ - identifier = 0 # Represented as strings - label = 1 # Represented as strings or integer if they are numeric - instruction = 2 # Represented as strings - directive = 3 # Represented as strings - integer = 4 # Represented as int - character = 5 # Represented as strings of length 1 - string = 6 # Represented as strings - float = 7 # Represented as ?? - # (we need correct conversions to the binary value) - # TODO - end = 8 - argsep = 9 - openparens = 10 - closeparens = 11 - -class Lexer: - def __init__(self, reader): - self.reader = reader - self.tokenstart = 0 - - def __iter__(self): - return self - - def __next__(self): - if self.reader.char is None: - self.reader.advance() - while True: - try: - # Instruction end - if self.reader.char == "\n" or self.reader.char == ";": - self.reader.advance() - return (TokenType.end, None) - - # Argument separator - elif self.reader.char == ",": - self.reader.advance() - return (TokenType.argsep, None) - - # String - elif self.reader.char == '"': - return self.string() - - # Character - elif self.reader.char == "'": - return self.character() - - # Comment - elif self.reader.char == "#": - self.comment() - - # Parenthesis - # Load a register as an address + an offset - elif self.reader.char == '(': - self.reader.advance() - return (TokenType.openparens, None) - elif self.reader.char == ')': - self.reader.advance() - return (TokenType.closeparens, None) - - # Starts with digit: - # - Numbers (any kind) - # - Numeric Labels - # - Numeric Label references - # - Load register as address: ld a1, 4(a0) - # ^^^^^ - # offset + reg - elif self.reader.char.isdigit() or self.reader.char == "-": - return self.number() - - # Identifiers - elif self.reader.char.isalpha(): - return self.identifier() - - elif self.reader.char == "": - break # FILE END - - else: - # TODO: Remove this, it's just for testing - self.reader.advance() - except Exception as e: - # Handle exceptions - # raise StopIteration - raise e - raise StopIteration - - def string (self): - logger.info("Found string") - self.reader.advance() # Ignore opening quotes - - string = "" - escaped = False # Set if previous character was a backslash - - while self.reader.char != '"' or escaped: - if self.reader.char == "": - # TODO: Check how to do this - raise ValueError("Error: string not closed, found EOF") - if self.reader.char == "\n": - # TODO: Consider the string as closed and continue but report - # the error? - raise ValueError("Error: string not closed, found newline") - - if escaped: - string += self.escaped_char(self.reader.char) - logger.debug("Escape sequence processed %s", - string[-1].__repr__()) - escaped = False - continue - if self.reader.char == "\\": - escaped = True - self.reader.advance() - continue - escaped = False - string += self.reader.char - self.reader.advance() - logger.info("Lexed string %s", string.__repr__()) - self.reader.advance() # Discard closing " - return (TokenType.string, string) - - def character (self): - logger.info("Found character") - self.reader.advance() # Ignore the opening quote - character = self.reader.char - if character == "": - # TODO: Check how to do this - raise ValueError("Error: found EOF") - if not character.isprintable() : - raise ValueError("Error: Non printable character") - - if character == "\\": - self.reader.advance() - character = self.escaped_char(self.reader.char) - logger.debug("Escape sequence processed: %s", character.__repr__()) - - # Make sure it's correctly closed - self.reader.advance() - if self.reader.char != "'": - raise ValueError("Parse error: expected `'`, found " + self.reader.char) - self.reader.advance() # Discard closing ' - - logger.info("Lexed char %s", character.__repr__()) - return (TokenType.character, character) - - def comment(self): - while self.reader != "\n": - self.reader.advance() - - def escaped_char(self, ch): - if ch == '"': - return '"' - elif ch == 'n': - return '\n' - elif ch == 't': - return '\t' - elif ch == '\\': - return '\\' - else: - # TODO: implement more escape sequences - return "" - - - def number(self): - """ - Process anything that starts with a number. Could be: - - An actual number in hex, octal, binary or decimal - - Numeric labels or numeric label references - - floating point (not implemented yet) - """ - numbstr = self.reader.char - self.reader.advance() - - logger.info("Found number: %s", numbstr) - - # Hex, Bin, Oct and the 0 - if numbstr == "0": - reprid = self.reader.peek() - if reprid == "x": - self.reader.advance() - return self.hex() - elif reprid == "b": - self.reader.advance() - return self.bin() - elif reprid.isdigit(): - return self.oct() - else: - return (TokenType.integer, 0) - - # TODO: floating point numbers - - # Decimal number - # consume a decimal number - while self.reader.char in decChars: - numbstr += self.reader.char - self.reader.advance() - - # Numeric label reference - if self.reader.char in set("bf"): - numbstr += self.reader.char - self.reader.advance() - return (TokenType.identifier, numbstr) - # TODO: Return identifier or label? - # It's a label-ref not a label! - - # Numeric label - if self.reader.char in ":": - self.reader.advance() - return (TokenType.label, numbstr) - - return (TokenType.integer, int(numbstr)) - - def hex(self): - # TODO: Handle possible errors - numbstr = "" - while self.reader.char in hexChars: - numbstr += self.reader.char - self.reader.advance() - return (TokenType.integer, int(numbstr, 16)) - - def oct(self): - # TODO: Handle possible errors - numbstr = "" - while self.reader.char in octChars: - numbstr += self.reader.char - self.reader.advance() - return (TokenType.integer, int(numbstr, 8)) - - def bin(self): - # TODO: Handle possible errors - numbstr = "" - while self.reader.char in binChars: - numbstr += self.reader.char - self.reader.advance() - return (TokenType.integer, int(numbstr, 2)) - - - def identifier(self): - s = "" - while self.reader.char.isalnum() or self.reader.char == "_": - s += self.reader.char - self.reader.advance() - return (TokenType.identifier, s) - -if __name__ == "__main__": - import sys - with Reader(sys.argv[1]) as src: - lexer = Lexer(src) - for token in lexer: - print(token) diff --git a/pysc-v/Frontend/logger.py b/pysc-v/Frontend/logger.py deleted file mode 100644 index a0f76e1..0000000 --- a/pysc-v/Frontend/logger.py +++ /dev/null @@ -1,10 +0,0 @@ -import logging -import sys - -logging.basicConfig() - - -def newlogger(name): - logger = logging.getLogger(name) - logger.setLevel(level=logging.DEBUG) - return logger diff --git a/pysc-v/Frontend/parser.py b/pysc-v/Frontend/parser.py deleted file mode 100644 index b5acfbb..0000000 --- a/pysc-v/Frontend/parser.py +++ /dev/null @@ -1 +0,0 @@ -from InstructionSets import RV32I, RV32C, RV32F, RV32D diff --git a/pysc-v/Frontend/reader.py b/pysc-v/Frontend/reader.py deleted file mode 100644 index 7694a94..0000000 --- a/pysc-v/Frontend/reader.py +++ /dev/null @@ -1,51 +0,0 @@ -# TODO Logging is interesting for debugging purposes, decide what to do with it -from logger import newlogger -logger = newlogger(__name__) -#import logging -#logger.setLevel(logging.WARN) - -class Reader: - def __init__(self, filename): - self.filename = filename - self._file = None - self.lineno = 0 - self.charno = 0 - self.char = None - - def open(self): - self._file = open(self.filename, "r") - return - def close(self): - self._file.close() - return - - def __enter__(self): - self.open() - return self - - def __exit__(self, type, value, tb): - self.close() - - def advance(self): - self.charno += 1 - if self.char == "\n": - self.lineno += 1 - self.charno = 0 - self.char = self._file.read(1) - - logger.debug("Read: %s" % self.char.__repr__()) - return - - def peek(self): - pos = self._file.tell() - ch = self._file.read(1) - self._file.seek(pos) - return ch - - -if __name__ == "__main__": - with Reader(__file__ ) as src: - print(src.peek()) - while src.peek() != "": - src.advance() - print(src.char) diff --git a/pysc-v/InstructionSets/RV32C.py b/pysc-v/InstructionSets/RV32C.py deleted file mode 100644 index e740ac4..0000000 --- a/pysc-v/InstructionSets/RV32C.py +++ /dev/null @@ -1,29 +0,0 @@ -from instructions import Instruction, InstructionSet - -class Compressed(Instruction): - size = 2 - - -class CR(Compressed): - pass - -class CI(Compressed): - pass - -class CSS(Compressed): - pass - -class CIW(Compressed): - pass - -class CJ(Compressed): - pass - -class CB(Compressed): - pass - -class CL(Compressed): - pass - -class CS(Compressed): - pass diff --git a/pysc-v/InstructionSets/RV32D.py b/pysc-v/InstructionSets/RV32D.py deleted file mode 100644 index e69de29..0000000 diff --git a/pysc-v/InstructionSets/RV32F.py b/pysc-v/InstructionSets/RV32F.py deleted file mode 100644 index e69de29..0000000 diff --git a/pysc-v/InstructionSets/RV32I.py b/pysc-v/InstructionSets/RV32I.py deleted file mode 100644 index f2929f3..0000000 --- a/pysc-v/InstructionSets/RV32I.py +++ /dev/null @@ -1,461 +0,0 @@ -from .instructions import Instruction, InstructionSet -from ctypes import c_uint32 - -RV32I = InstructionSet() - -class R(Instruction): - funct3 = None - funct7 = None - opcode = None - def __init__(self, rd, rs1, rs2): - self.rd = rd - self.rs1 = rs1 - self.rs2 = rs2 - - def compile(self): - # TODO: ensure sizes and convert register names to number... - return c_uint32( - (self.funct7 << 25) +\ - (self.rs2 << 20) +\ - (self.rs1 << 15) +\ - (self.funct3 << 12) +\ - (self.rd << 7) +\ - self.opcode - ) - -class I(Instruction): - funct3 = None - opcode = None - - def __init__(self, rd, rs, imm): - self.rd = rd - self.rs = rs - self.imm = imm - - def compile(self): - return c_uint32( - (self.imm << 20) +\ - (self.rs << 15) +\ - (self.funct3 << 12) +\ - (self.rd << 7) +\ - self.opcode - ) - -class S(Instruction): - funct3 = None - opcode = None - - def __init__(self, rs1, rs2, imm): - self.rs1 = rs1 - self.rs2 = rs2 - self.imm = imm - - def compile(self): - imm_0_4 = self.imm & 0x1F - imm_5_11 = (self.imm & 0xFE0)>>5 - return c_uint32( - (imm_5_11 << 25) +\ - (self.rs1 << 20) +\ - (self.rs2 << 15) +\ - (self.funct3 << 12) +\ - (imm_0_4 << 7) +\ - self.opcode - ) - -class B(Instruction): - funct3 = None - opcode = None - - def __init__(self, rs1, rs2, imm): - self.rs1 = rs1 - self.rs2 = rs2 - self.imm = imm - - def compile(self): - # NOTE: The lowest bit of the imm is always 0 because instructions - # are at least 16 bits wide, so it's not used, that's why this - # instruction looks that weird - imm_12 = (self.imm & 0b1000000000000)>>12 - imm_11 = (self.imm & 0b0100000000000)>>11 - imm_5_10 = (self.imm & 0b0011111100000)>>5 - imm_1_4 = (self.imm & 0b0000000011110)>>1 - return c_uint32( - (imm_12 << 31) +\ - (imm_5_10 << 25) +\ - (self.rs2 << 20) +\ - (self.rs1 << 15) +\ - (self.funct3 << 12) +\ - (imm_1_4 << 8) +\ - (imm_11 << 7) +\ - self.opcode - ) - - -class U(Instruction): - opcode = None - def __init__(self, rd, imm): - self.rd = rd - self.imm = imm - - def compile(self): - # NOTE: U Type is for AUIPC and LUI that only use the high part of the - # immediate - imm_12_32 = (self.imm & 0xFFFFF000)>>12 - return c_uint32( - (imm_12_32 << 12) +\ - (self.rd << 7) +\ - self.opcode - ) - -class J(Instruction): - opcode = None - - def __init__(self, rd, imm): - self.rd = rd - self.imm = imm - - def compile(self): - # NOTE: Jumps are also weird - imm_20 = (self.imm & 0x100000)>>20 - imm_12_19 = (self.imm & 0x0FF000)>>12 - imm_11 = (self.imm & 0x000800)>>11 - imm_1_10 = (self.imm & 0x0007FE)>>1 - return c_uint32( - (imm_20 << 31) +\ - (imm_1_10 << 21) +\ - (imm_11 << 20) +\ - (imm_12_19 << 12) +\ - (self.rd << 7) +\ - self.opcode - ) - - - - -@RV32I.instruction -class lui(U): - name = "lui" - opcode = 0b0110111 - -@RV32I.instruction -class auipc(U): - name = "auipc" - opcode = 0b0010111 - -@RV32I.instruction -class jal(J): - name = "jal" - opcode = 0b1101111 - - def execute(self, pc): - # TODO - # - Save current pc in rd - # - Make pc from `imm` - # - Return new pc - return pc - -@RV32I.instruction -class jalr(I): - name = "jalr" - opcode = 0b1100111 - funct3 = 0b000 - -@RV32I.instruction -class beq(B): - name = "beq" - opcode = 0b1100011 - funct3 = 0b000 - -@RV32I.instruction -class bne(B): - name = "bne" - opcode = 0b1100011 - funct3 = 0b001 - -@RV32I.instruction -class blt(B): - name = "blt" - opcode = 0b1100011 - funct3 = 0b100 - -@RV32I.instruction -class bge(B): - name = "bge" - opcode = 0b1100011 - funct3 = 0b101 - -@RV32I.instruction -class bltu(B): - name = "bltu" - opcode = 0b1100011 - funct3 = 0b110 - -@RV32I.instruction -class bgeu(B): - name = "bgeu" - opcode = 0b1100011 - funct3 = 0b111 - -@RV32I.instruction -class lb(I): - name = "lb" - opcode = 0b0000011 - funct3 = 0b000 - -@RV32I.instruction -class lh(I): - name = "lh" - opcode = 0b0000011 - funct3 = 0b001 - -@RV32I.instruction -class lw(I): - name = "lw" - opcode = 0b0000011 - funct3 = 0b010 - -@RV32I.instruction -class lbu(I): - name = "lbu" - opcode = 0b0000011 - funct3 = 0b100 - -@RV32I.instruction -class lhu(I): - name = "lhu" - opcode = 0b0000011 - funct3 = 0b101 - - -@RV32I.instruction -class sb(S): - name = "sb" - opcode = 0b0100011 - funct3 = 0b000 - -@RV32I.instruction -class sh(S): - name = "sh" - opcode = 0b0100011 - funct3 = 0b001 - -@RV32I.instruction -class sw(S): - name = "sw" - opcode = 0b0100011 - funct3 = 0b010 - -@RV32I.instruction -class addi(I): - name = "addi" - opcode = 0b0010011 - funct3 = 0b000 - - def execute(self, pc): - # TODO - return pc + self.size - -@RV32I.instruction -class slti(I): - name = "slti" - opcode = 0b0010011 - funct3 = 0b010 - -@RV32I.instruction -class sltiu(I): - name = "sltiu" - opcode = 0b0010011 - funct3 = 0b011 - -@RV32I.instruction -class xori(I): - name = "xori" - opcode = 0b0010011 - funct3 = 0b100 - -@RV32I.instruction -class ori(I): - name = "ori" - opcode = 0b0010011 - funct3 = 0b110 - -@RV32I.instruction -class andi(I): - name = "andi" - opcode = 0b0010011 - funct3 = 0b111 - - -class ShiftImm(I): - # NOTE: This is an special type used for shifting operations because they - # have 7 bits left after the maximum shift (5bits -> 32 rotations) - # they can apply. - # In RV64I they can indicate rotation with 1 bit more (64 rotations) so - # they use a funct6 instead. - funct7 = None - funct3 = None - opcode = None - - def __init__(self, rd, rs, imm): - self.rd = rd - self.rs = rs - self.imm = imm - - def compile(self): - return c_uint32( - (self.funct7 << 25) +\ - (self.imm << 20) +\ - (self.rs << 15) +\ - (self.funct3 << 12) +\ - (self.rd << 7) +\ - self.opcode - ) - -@RV32I.instruction -class slli(ShiftImm): - name = "slli" - opcode = 0b0010011 - funct3 = 0b001 - funct7 = 0b0000000 - -@RV32I.instruction -class srli(ShiftImm): - name = "srli" - opcode = 0b0010011 - funct3 = 0b101 - funct7 = 0b0000000 - -@RV32I.instruction -class srai(ShiftImm): - name = "srai" - opcode = 0b0010011 - funct3 = 0b101 - funct7 = 0b0100000 - - -@RV32I.instruction -class add(R): - name = "add" - opcode = 0b0110011 - funct3 = 0b000 - funct7 = 0b0000000 - - def execute(self, pc): - # TODO - return pc + self.size - -@RV32I.instruction -class sub(R): - name = "sub" - opcode = 0b0110011 - funct3 = 0b000 - funct7 = 0b0100000 - -@RV32I.instruction -class sll(R): - name = "sll" - opcode = 0b0110011 - funct3 = 0b001 - funct7 = 0b0000000 - -@RV32I.instruction -class slt(R): - name = "slt" - opcode = 0b0110011 - funct3 = 0b010 - funct7 = 0b0000000 - -@RV32I.instruction -class sltu(R): - name = "sltu" - opcode = 0b0110011 - funct3 = 0b011 - funct7 = 0b0000000 - -@RV32I.instruction -class xor(R): - name = "xor" - opcode = 0b0110011 - funct3 = 0b100 - funct7 = 0b0000000 - -@RV32I.instruction -class srl(R): - name = "srl" - opcode = 0b0110011 - funct3 = 0b101 - funct7 = 0b0000000 - -@RV32I.instruction -class sra(R): - name = "sra" - opcode = 0b0110011 - funct3 = 0b101 - funct7 = 0b0100000 - -@RV32I.instruction -class _or(R): - name = "or" - opcode = 0b0110011 - funct3 = 0b110 - funct7 = 0b0000000 - -@RV32I.instruction -class _and(R): - name = "and" - opcode = 0b0110011 - funct3 = 0b111 - funct7 = 0b0000000 - -@RV32I.instruction -class ecall(I): - name = "ecall" - opcode = 0b1110011 - funct3 = 0b000 - - def __init__(self): - # NOTE: ecall is a I-type instruction but doesn't get any arg and sets - # every field to 0 - self.rd = 0b00000 - self.rs = 0b00000 - self.imm = 0b000000000000 - - -@RV32I.instruction -class ebreak(I): - name = "ebreak" - opcode = 0b1110011 - funct3 = 0b000 - - def __init__(self): - # NOTE: ebreak is a I-type instruction but doesn't get any arg and pre- - # -sets every field to a fixed value - self.rd = 0b00000 - self.rs = 0b00000 - self.imm = 0b000000000001 - - - - - - - - - - - - - - - - -@RV32I.pseudoinstruction -class j(J): - name = "j" - def __new__(cls, imm): - return jal("x0", imm) - - -if __name__ == "__main__": - print(RV32I) - print(RV32I.instructions) diff --git a/pysc-v/InstructionSets/RV64I.py b/pysc-v/InstructionSets/RV64I.py deleted file mode 100644 index 72b8bcb..0000000 --- a/pysc-v/InstructionSets/RV64I.py +++ /dev/null @@ -1,63 +0,0 @@ -from .instructions import Instruction, InstructionSet -from ctypes import c_uint32 -from .RV32I import * - -RV64I = InstructionSet() - -class ShiftImm64(ShiftImm): - # NOTE: This is an special type used for shifting operations because they - # have 7 bits left after the maximum shift (5bits -> 32 rotations) - # they can apply. - # In RV64I they can indicate rotation with 1 bit more (64 rotations) so - # they use a funct6 instead. - funct6 = None - funct3 = None - opcode = None - - def __init__(self, rd, rs, imm): - self.rd = rd - self.rs = rs - self.imm = imm - - def compile(self): - return c_uint32( - (self.funct6 << 26) +\ - (self.imm << 20) +\ - (self.rs << 15) +\ - (self.funct3 << 12) +\ - (self.rd << 7) +\ - self.opcode - ) - -@RV64I.instruction -class slli(ShiftImm64): - name = "slli" - opcode = 0b0010011 - funct3 = 0b001 - funct6 = 0b000000 - -@RV64I.instruction -class srli(ShiftImm64): - name = "srli" - opcode = 0b0010011 - funct3 = 0b101 - funct6 = 0b000000 - -@RV64I.instruction -class srai(ShiftImm64): - name = "srai" - opcode = 0b0010011 - funct3 = 0b101 - funct6 = 0b010000 - -@RV64I.instruction -class sd(S): - name = "sd" - opcode = 0b0100011 - funct3 = 0b011 - -@RV64I.instruction -class ld(I): - name = "ld" - opcode = 0b0000011 - funct3 = 0b011 diff --git a/pysc-v/InstructionSets/__init__.py b/pysc-v/InstructionSets/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pysc-v/InstructionSets/instructions.py b/pysc-v/InstructionSets/instructions.py deleted file mode 100644 index 00a3146..0000000 --- a/pysc-v/InstructionSets/instructions.py +++ /dev/null @@ -1,36 +0,0 @@ -from ctypes import c_uint32 - - -class Instruction: - size = 4 # Instruction size in bytes - - def __init__(self): - pass - - def compile(self): - # return the binstream of the instruction in a c_uint32 - pass - - def execute(self, pc): - # executes the instruction and returns the next program counter - return - -class InstructionSet: - - def __init__(self, init=None): - self.instructions = dict() - - def instruction(self, ins): - if ins.name not in self.instructions: - self.instructions[ins.name] = ins - return ins - - # NOTE: We don't need to treat pseudoinstructions in an special way yet, - # but we separate the decorator for clarity - pseudoinstruction = instruction - - -if __name__ == "__main__": - # TODO This is the interface i'd love to have - addins = add("x5","x2","zero") - j("labelName") diff --git a/pysc-v/main.py b/pysc-v/main.py deleted file mode 100644 index 2d6f6a5..0000000 --- a/pysc-v/main.py +++ /dev/null @@ -1,5 +0,0 @@ -# Use a generator for the execution flow -# -> PC has to be a global variable, updated by each instruction to the next val -# So user can set the PC by hand and call next(run) and make the code jump! - - diff --git a/pysc-v/memory.py b/pysc-v/memory.py deleted file mode 100644 index 84bf0bd..0000000 --- a/pysc-v/memory.py +++ /dev/null @@ -1,112 +0,0 @@ -from ctypes import c_uint8, c_uint16, c_uint32 - -class Addressable: - def __init__(self, start=0, end=0): - self.start = start - self.end = end - def addr_to_pos(self, addr): - if not (self.start <= addr < self.end): - raise KeyError("Address out of space") - return addr - self.start - -def merge_bytes (byte_iter): - val = 0 - for i,v in enumerate(byte_ouput): - val += v.value << 8 * i - return val -def split_bytes (val, byte_count): - return tuple( c_uint8(val >> 8 * i) for i in range(byte_count) ) - -class Memory32 (Addressable): - """ - This is a raw 32-bit word memory addressable at a byte level. - Internally it is defined as c_uint8 list. - Special functions are needed to access halfs (c_uint16) and words - (c_uint32). - """ - - def __init__(self, start=0, end=0, bigEndian=False): - super().__init__(start, end) - self.bigEndian = bigEndian - self.lastChange = None - # Pre-allocate or allocate or allocate per write? - self.data = [None] * (end - start) - - def get_byte(self, addr): - return self.data[ self.addr_to_pos(addr) ] - def set_byte(self, addr, val): - if not isinstance(val, c_uint8): - if val > 0xFF: - raise ValueError("Value is larger than a byte") - self.data[ self.addr_to_pos(addr) ] = val if isinstance(val, c_uint8) else c_uint8(val) - self.lastChange = range(addr, addr+1) - - - def get_half(self,addr): - byte_output = (get_byte(addr), get_byte(addr+1)) - if self.bigEndian: - byte_output = reversed(byte_output) - return c_uint16( merge_bytes(byte_output) ) - - def set_half(self, addr, val): - if not isinstance(val, c_uint16): - if val > 0xFFFF: - raise ValueError("Value is larger than a half") - bytes = split_bytes(val, 2) - if self.bigEndian: - bytes = reversed(bytes) - for i,v in enumerate(bytes): - self.set_byte(addr+i, v) - self.lastChange = range(addr,addr+2) - - def get_word(self, pos): - byte_output = tuple(get_byte(addr+i) for i in range(4)) - if self.bigEndian: - byte_output = reversed(byte_output) - return c_uint32( merge_bytes(byte_output) ) - - def set_word(self, addr, val): - if not isinstance(val, c_uint32): - if val > 0xFFFFFFFF: - raise ValueError("Value is larger than a word") - bytes = split_bytes(val, 4) - if self.bigEndian: - bytes = reversed(bytes) - for i,v in enumerate(bytes): - self.set_byte(addr+i, v) - self.lastChange = range(addr,addr+4) - - def __str__(self): - out = " " - out += "-" * 50 - out += "\n" - for i,d in enumerate(self.data): - if d is not None: - addr = i+self.start - out += "->" if addr in self.lastChange else " " - out += f"| {addr:#20x} | {d.value:#5} | {d.value:#04x} | {d.value:#010b} |" - out += "\n" - out += " " - out += "-" * 50 - out += "\n" - return out - - -class Memory32RO (Memory32): - def set_byte(self, addr, val): - raise NotImplementedError("Trying to write in a read-only memory") - def set_half(self, addr, val): - raise NotImplementedError("Trying to write in a read-only memory") - def set_word(self, addr, val): - raise NotImplementedError("Trying to write in a read-only memory") - -class CodeMemory32 (Memory32RO): - def __init__(self, start=0, end=0): - super().__init__(writable=False,start=start,end=end) - - -if __name__ == "__main__": - m = Memory32(start=100,end=200) - print(m.start, m.end) - m.set_byte(100, 246) - print(m) diff --git a/pysc-v/registers/RV32F.py b/pysc-v/registers/RV32F.py deleted file mode 100644 index 8e3fd28..0000000 --- a/pysc-v/registers/RV32F.py +++ /dev/null @@ -1,45 +0,0 @@ -from RV32I import RegistersRV32I - -class RegistersRV32F(RegistersRV32I): - - def __init__(self): - self.names = {} - self.data = [] - - self.setter = [] - self.getter = [] - - self.lastChange = None - - self.addRegister(("f0","ft0")) - self.addRegister(("f1","ft1")) - self.addRegister(("f2","ft2")) - self.addRegister(("f3","ft3")) - self.addRegister(("f4","ft4")) - self.addRegister(("f5","ft5")) - self.addRegister(("f6","ft6")) - self.addRegister(("f7","ft7")) - self.addRegister(("f8","fs0")) - self.addRegister(("f9","fs1")) - self.addRegister(("f10","a0")) - self.addRegister(("f11","a1")) - self.addRegister(("f12","a2")) - self.addRegister(("f13","a3")) - self.addRegister(("f14","a4")) - self.addRegister(("f15","a5")) - self.addRegister(("f16","a6")) - self.addRegister(("f17","a7")) - self.addRegister(("f18","fs2")) - self.addRegister(("f19","fs3")) - self.addRegister(("f20","fs4")) - self.addRegister(("f21","fs5")) - self.addRegister(("f22","fs6")) - self.addRegister(("f23","fs7")) - self.addRegister(("f24","fs8")) - self.addRegister(("f25","fs9")) - self.addRegister(("f26","fs10")) - self.addRegister(("f27","fs11")) - self.addRegister(("f28","ft8")) - self.addRegister(("f29","ft9")) - self.addRegister(("f30","ft10")) - self.addRegister(("f31","ft11")) diff --git a/pysc-v/registers/RV32I.py b/pysc-v/registers/RV32I.py deleted file mode 100644 index a723064..0000000 --- a/pysc-v/registers/RV32I.py +++ /dev/null @@ -1,117 +0,0 @@ -# Register block of RV32I -# Set and get using [] with the name of the register (lowercase) or the -# position -from ctypes import c_uint32 - -def defaultGetter(self, pos): - return self.data[pos] - -def defaultSetter(self, pos, val): - self.data[pos] = val - -def zeroSetter(self, pos, val): - # hardwired to zero - self.data[pos] = c_uint32(0) - - -class RegistersRV32I: - - def __init__(self): - self.names = {} - self.data = [] - - self.setter = [] - self.getter = [] - - self.lastChange = None - - self.addRegister(("x0","zero"), setter = zeroSetter) - self.addRegister(("x1","ra")) - self.addRegister(("x2","sp", "v0")) - self.addRegister(("x3","gp", "v1")) - self.addRegister(("x4","tp")) - self.addRegister(("x5","t0")) - self.addRegister(("x6","t1")) - self.addRegister(("x7","t2")) - self.addRegister(("x8","s0","fp")) - self.addRegister(("x9","s1")) - self.addRegister(("x10","a0")) - self.addRegister(("x11","a1")) - self.addRegister(("x12","a2")) - self.addRegister(("x13","a3")) - self.addRegister(("x14","a4")) - self.addRegister(("x15","a5")) - self.addRegister(("x16","a6")) - self.addRegister(("x17","a7")) - self.addRegister(("x18","s2")) - self.addRegister(("x19","s3")) - self.addRegister(("x20","s4")) - self.addRegister(("x21","s5")) - self.addRegister(("x22","s6")) - self.addRegister(("x23","s7")) - self.addRegister(("x24","s8")) - self.addRegister(("x25","s9")) - self.addRegister(("x26","s10")) - self.addRegister(("x27","s11")) - self.addRegister(("x28","t3")) - self.addRegister(("x29","t4")) - self.addRegister(("x30","t5")) - self.addRegister(("x31","t6")) - - - def addRegister(self, names, getter=defaultGetter, setter=defaultSetter): - for name in names: - if name in self.names: - raise KeyError("Register name already in use") - - currentpos = len(self.data) - for name in names: - self.names[name] = currentpos - self.data.append(c_uint32(0)) - - self.getter.append(getter) - self.setter.append(setter) - - def getPos(self, el): - if isinstance(el, str): - pos = self.names[el] - elif isinstance(el, int): - pos = el - else: - raise ValueError("Wrong type of register id. Must be str or int") - return pos - - def __getitem__(self, el): - pos = self.getPos(el) - return self.getter[pos](self, pos) - - def __setitem__(self, el, val): - pos = self.getPos(el) - - if val > 0xFFFFFFFF: - raise ValueError("Value is larger than size of the register") - val = c_uint32(val) - self.setter[pos](self, pos, val) - self.lastChange = pos - - - def __str__(self): - - out = " " - out += "-" * 67 - out += "\n" - for i,d in enumerate(self.data): - out += "->" if i == self.lastChange else " " - out += f" | {d.value:#13} | {d.value:#010x} | {d.value:#034b} |" - out += "\n" - out += " " - out += "-" * 67 - out += "\n" - return out - - -if __name__ == "__main__": - Regs = RegistersRV32I() - Regs[0] = 1 - Regs["x1"] = 10 - print(Regs) diff --git a/pysc-v/registers/__init__.py b/pysc-v/registers/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pyscv/Frontend/__init__.py b/pyscv/Frontend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyscv/Frontend/lexer.py b/pyscv/Frontend/lexer.py new file mode 100644 index 0000000..f7ca18a --- /dev/null +++ b/pyscv/Frontend/lexer.py @@ -0,0 +1,292 @@ +# TODO Logging is interesting for debugging purposes, decide what to do with it +from .logger import newlogger +logger = newlogger(__name__) +#import logging +#logger.setLevel(logging.WARN) + + +###### + +from enum import Enum +from .reader import Reader + +binChars = set("01") +octChars = set("01234567") +decChars = set("0123456789") +hexChars = set("0123456789ABCDEFabcdef") + +class TokenType(Enum): + """ + These are the possible tokens that the lexer knows, they are converted to + their most accurate representation in python. + """ + identifier = 0 # Represented as strings + label = 1 # Represented as strings or integer if they are numeric + instruction = 2 # Represented as strings + directive = 3 # Represented as strings + integer = 4 # Represented as int + character = 5 # Represented as strings of length 1 + string = 6 # Represented as strings + float = 7 # Represented as ?? + # (we need correct conversions to the binary value) + # TODO + end = 8 + argsep = 9 + openparens = 10 + closeparens = 11 + +class Lexer: + def __init__(self, reader): + self.reader = reader + self.tokenstart = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.reader.char is None: + self.reader.advance() + while True: + try: + # Instruction end + if self.reader.char == "\n" or self.reader.char == ";": + self.reader.advance() + return (TokenType.end, None) + + # Spaces + elif self.reader.char.isspace(): + self.reader.advance() + + # Argument separator + elif self.reader.char == ",": + self.reader.advance() + return (TokenType.argsep, None) + + # String + elif self.reader.char == '"': + return self.string() + + # Character + elif self.reader.char == "'": + return self.character() + + # Comment + elif self.reader.char == "#": + self.comment() + + # Parenthesis + # Load a register as an address + an offset + elif self.reader.char == '(': + self.reader.advance() + return (TokenType.openparens, None) + elif self.reader.char == ')': + self.reader.advance() + return (TokenType.closeparens, None) + + # Starts with digit: + # - Numbers (any kind) + # - Numeric Labels + # - Numeric Label references + # - Load register as address: ld a1, 4(a0) + # ^^^^^ + # offset + reg + elif self.reader.char.isdigit() or self.reader.char == "-": + return self.number() + + # Identifiers or labels + elif self.reader.char.isalpha() or self.reader.char == "_": + return self.identifier() + + # Directive + elif self.reader.char == "." and self.reader.peek().isalpha(): + return self.directive() + + elif self.reader.char == "": + break # FILE END + + else: + raise ValueError("Don't know how to lex") + + except Exception as e: + # Handle exceptions + # raise StopIteration + raise e + raise StopIteration + + def string (self): + logger.info("Found string") + self.reader.advance() # Ignore opening quotes + + string = "" + escaped = False # Set if previous character was a backslash + + while self.reader.char != '"' or escaped: + if self.reader.char == "": + # TODO: Check how to do this + raise ValueError("Error: string not closed, found EOF") + if self.reader.char == "\n": + # TODO: Consider the string as closed and continue but report + # the error? + raise ValueError("Error: string not closed, found newline") + + if escaped: + string += self.escaped_char(self.reader.char) + logger.debug("Escape sequence processed %s", + string[-1].__repr__()) + escaped = False + continue + if self.reader.char == "\\": + escaped = True + self.reader.advance() + continue + escaped = False + string += self.reader.char + self.reader.advance() + logger.info("Lexed string %s", string.__repr__()) + self.reader.advance() # Discard closing " + return (TokenType.string, string) + + def character (self): + logger.info("Found character") + self.reader.advance() # Ignore the opening quote + character = self.reader.char + if character == "": + # TODO: Check how to do this + raise ValueError("Error: found EOF") + if not character.isprintable() : + raise ValueError("Error: Non printable character") + + if character == "\\": + self.reader.advance() + character = self.escaped_char(self.reader.char) + logger.debug("Escape sequence processed: %s", character.__repr__()) + + # Make sure it's correctly closed + self.reader.advance() + if self.reader.char != "'": + raise ValueError("Parse error: expected `'`, found " + self.reader.char) + self.reader.advance() # Discard closing ' + + logger.info("Lexed char %s", character.__repr__()) + return (TokenType.character, character) + + def comment(self): + while self.reader != "\n": + self.reader.advance() + + def escaped_char(self, ch): + if ch == '"': + return '"' + elif ch == 'n': + return '\n' + elif ch == 't': + return '\t' + elif ch == '\\': + return '\\' + else: + # TODO: implement more escape sequences + return "" + + + def number(self): + """ + Process anything that starts with a number. Could be: + - An actual number in hex, octal, binary or decimal + - Numeric labels or numeric label references + - floating point (not implemented yet) + """ + numbstr = self.reader.char + self.reader.advance() + + logger.info("Found number: %s", numbstr) + + # Hex, Bin, Oct and the 0 + if numbstr == "0": + reprid = self.reader.peek() + if reprid == "x": + self.reader.advance() + return self.hex() + elif reprid == "b": + self.reader.advance() + return self.bin() + elif reprid.isdigit(): + return self.oct() + else: + return (TokenType.integer, 0) + + # TODO: floating point numbers + + # Decimal number + # consume a decimal number + while self.reader.char in decChars: + numbstr += self.reader.char + self.reader.advance() + + # Numeric label reference + if self.reader.char in set("bf"): + numbstr += self.reader.char + self.reader.advance() + return (TokenType.identifier, numbstr) + # TODO: Return identifier or label? + # It's a label-ref not a label! + + # Numeric label + if self.reader.char in ":": + self.reader.advance() + return (TokenType.label, numbstr) + + return (TokenType.integer, int(numbstr)) + + def hex(self): + # TODO: Handle possible errors + numbstr = "" + while self.reader.char in hexChars: + numbstr += self.reader.char + self.reader.advance() + return (TokenType.integer, int(numbstr, 16)) + + def oct(self): + # TODO: Handle possible errors + numbstr = "" + while self.reader.char in octChars: + numbstr += self.reader.char + self.reader.advance() + return (TokenType.integer, int(numbstr, 8)) + + def bin(self): + # TODO: Handle possible errors + numbstr = "" + while self.reader.char in binChars: + numbstr += self.reader.char + self.reader.advance() + return (TokenType.integer, int(numbstr, 2)) + + + def identifier(self): + logger.info("Found identifier") + s = "" + while self.reader.char.isalnum() or self.reader.char == "_": + s += self.reader.char + self.reader.advance() + + if self.reader.char == ":": + self.reader.advance() + return (TokenType.label, s) + else: + return (TokenType.identifier, s) + + def directive(self): + logger.info("Found directive") + s = "" + self.reader.advance() # Discard leading dot + while self.reader.char.isalnum() or self.reader.char == "_": + s += self.reader.char + self.reader.advance() + return (TokenType.directive, s) + +if __name__ == "__main__": + import sys + with Reader(sys.argv[1]) as src: + lexer = Lexer(src) + for token in lexer: + print(token) diff --git a/pyscv/Frontend/logger.py b/pyscv/Frontend/logger.py new file mode 100644 index 0000000..a0f76e1 --- /dev/null +++ b/pyscv/Frontend/logger.py @@ -0,0 +1,10 @@ +import logging +import sys + +logging.basicConfig() + + +def newlogger(name): + logger = logging.getLogger(name) + logger.setLevel(level=logging.DEBUG) + return logger diff --git a/pyscv/Frontend/parser.py b/pyscv/Frontend/parser.py new file mode 100644 index 0000000..b5acfbb --- /dev/null +++ b/pyscv/Frontend/parser.py @@ -0,0 +1 @@ +from InstructionSets import RV32I, RV32C, RV32F, RV32D diff --git a/pyscv/Frontend/reader.py b/pyscv/Frontend/reader.py new file mode 100644 index 0000000..a2a280b --- /dev/null +++ b/pyscv/Frontend/reader.py @@ -0,0 +1,51 @@ +# TODO Logging is interesting for debugging purposes, decide what to do with it +from .logger import newlogger +logger = newlogger(__name__) +#import logging +#logger.setLevel(logging.WARN) + +class Reader: + def __init__(self, filename): + self.filename = filename + self._file = None + self.lineno = 0 + self.charno = 0 + self.char = None + + def open(self): + self._file = open(self.filename, "r") + return + def close(self): + self._file.close() + return + + def __enter__(self): + self.open() + return self + + def __exit__(self, type, value, tb): + self.close() + + def advance(self): + self.charno += 1 + if self.char == "\n": + self.lineno += 1 + self.charno = 0 + self.char = self._file.read(1) + + logger.debug("Read: %s" % self.char.__repr__()) + return + + def peek(self): + pos = self._file.tell() + ch = self._file.read(1) + self._file.seek(pos) + return ch + + +if __name__ == "__main__": + with Reader(__file__ ) as src: + print(src.peek()) + while src.peek() != "": + src.advance() + print(src.char) diff --git a/pyscv/InstructionSets/RV32C.py b/pyscv/InstructionSets/RV32C.py new file mode 100644 index 0000000..e740ac4 --- /dev/null +++ b/pyscv/InstructionSets/RV32C.py @@ -0,0 +1,29 @@ +from instructions import Instruction, InstructionSet + +class Compressed(Instruction): + size = 2 + + +class CR(Compressed): + pass + +class CI(Compressed): + pass + +class CSS(Compressed): + pass + +class CIW(Compressed): + pass + +class CJ(Compressed): + pass + +class CB(Compressed): + pass + +class CL(Compressed): + pass + +class CS(Compressed): + pass diff --git a/pyscv/InstructionSets/RV32D.py b/pyscv/InstructionSets/RV32D.py new file mode 100644 index 0000000..e69de29 diff --git a/pyscv/InstructionSets/RV32F.py b/pyscv/InstructionSets/RV32F.py new file mode 100644 index 0000000..e69de29 diff --git a/pyscv/InstructionSets/RV32I.py b/pyscv/InstructionSets/RV32I.py new file mode 100644 index 0000000..3fa02ad --- /dev/null +++ b/pyscv/InstructionSets/RV32I.py @@ -0,0 +1,466 @@ +from .instructions import Instruction, InstructionSet +from ctypes import c_uint32 + +RV32I = InstructionSet() + +class R(Instruction): + funct3 = None + funct7 = None + opcode = None + def __init__(self, rd, rs1, rs2): + self.rd = rd + self.rs1 = rs1 + self.rs2 = rs2 + + def compile(self): + # TODO: ensure sizes and convert register names to number... + return c_uint32( + (self.funct7 << 25) +\ + (self.rs2 << 20) +\ + (self.rs1 << 15) +\ + (self.funct3 << 12) +\ + (self.rd << 7) +\ + self.opcode + ) + +class I(Instruction): + funct3 = None + opcode = None + + def __init__(self, rd, rs, imm): + self.rd = rd + self.rs = rs + self.imm = imm + + def compile(self): + return c_uint32( + (self.imm << 20) +\ + (self.rs << 15) +\ + (self.funct3 << 12) +\ + (self.rd << 7) +\ + self.opcode + ) + +class S(Instruction): + funct3 = None + opcode = None + + def __init__(self, rs1, rs2, imm): + self.rs1 = rs1 + self.rs2 = rs2 + self.imm = imm + + def compile(self): + imm_0_4 = self.imm & 0x1F + imm_5_11 = (self.imm & 0xFE0)>>5 + return c_uint32( + (imm_5_11 << 25) +\ + (self.rs1 << 20) +\ + (self.rs2 << 15) +\ + (self.funct3 << 12) +\ + (imm_0_4 << 7) +\ + self.opcode + ) + +class B(Instruction): + funct3 = None + opcode = None + + def __init__(self, rs1, rs2, imm): + self.rs1 = rs1 + self.rs2 = rs2 + self.imm = imm + + def compile(self): + # NOTE: The lowest bit of the imm is always 0 because instructions + # are at least 16 bits wide, so it's not used, that's why this + # instruction looks that weird + imm_12 = (self.imm & 0b1000000000000)>>12 + imm_11 = (self.imm & 0b0100000000000)>>11 + imm_5_10 = (self.imm & 0b0011111100000)>>5 + imm_1_4 = (self.imm & 0b0000000011110)>>1 + return c_uint32( + (imm_12 << 31) +\ + (imm_5_10 << 25) +\ + (self.rs2 << 20) +\ + (self.rs1 << 15) +\ + (self.funct3 << 12) +\ + (imm_1_4 << 8) +\ + (imm_11 << 7) +\ + self.opcode + ) + + def patch(imm): + self.imm = imm + + +class U(Instruction): + opcode = None + def __init__(self, rd, imm): + self.rd = rd + self.imm = imm + + def compile(self): + # NOTE: U Type is for AUIPC and LUI that only use the high part of the + # immediate + imm_12_32 = (self.imm & 0xFFFFF000)>>12 + return c_uint32( + (imm_12_32 << 12) +\ + (self.rd << 7) +\ + self.opcode + ) + +class J(Instruction): + opcode = None + + def __init__(self, rd, imm): + self.rd = rd + self.imm = imm + + def compile(self): + # NOTE: Jumps are also weird + imm_20 = (self.imm & 0x100000)>>20 + imm_12_19 = (self.imm & 0x0FF000)>>12 + imm_11 = (self.imm & 0x000800)>>11 + imm_1_10 = (self.imm & 0x0007FE)>>1 + return c_uint32( + (imm_20 << 31) +\ + (imm_1_10 << 21) +\ + (imm_11 << 20) +\ + (imm_12_19 << 12) +\ + (self.rd << 7) +\ + self.opcode + ) + + def patch(imm): + self.imm = imm + + + +@RV32I.instruction +class lui(U): + name = "lui" + opcode = 0b0110111 + +@RV32I.instruction +class auipc(U): + name = "auipc" + opcode = 0b0010111 + +@RV32I.instruction +class jal(J): + name = "jal" + opcode = 0b1101111 + + def execute(self, pc): + # TODO + # - Save current pc in rd + # - Make pc from `imm` + # - Return new pc + return pc + +@RV32I.instruction +class jalr(I): + name = "jalr" + opcode = 0b1100111 + funct3 = 0b000 + +@RV32I.instruction +class beq(B): + name = "beq" + opcode = 0b1100011 + funct3 = 0b000 + +@RV32I.instruction +class bne(B): + name = "bne" + opcode = 0b1100011 + funct3 = 0b001 + +@RV32I.instruction +class blt(B): + name = "blt" + opcode = 0b1100011 + funct3 = 0b100 + +@RV32I.instruction +class bge(B): + name = "bge" + opcode = 0b1100011 + funct3 = 0b101 + +@RV32I.instruction +class bltu(B): + name = "bltu" + opcode = 0b1100011 + funct3 = 0b110 + +@RV32I.instruction +class bgeu(B): + name = "bgeu" + opcode = 0b1100011 + funct3 = 0b111 + +@RV32I.instruction +class lb(I): + name = "lb" + opcode = 0b0000011 + funct3 = 0b000 + +@RV32I.instruction +class lh(I): + name = "lh" + opcode = 0b0000011 + funct3 = 0b001 + +@RV32I.instruction +class lw(I): + name = "lw" + opcode = 0b0000011 + funct3 = 0b010 + +@RV32I.instruction +class lbu(I): + name = "lbu" + opcode = 0b0000011 + funct3 = 0b100 + +@RV32I.instruction +class lhu(I): + name = "lhu" + opcode = 0b0000011 + funct3 = 0b101 + + +@RV32I.instruction +class sb(S): + name = "sb" + opcode = 0b0100011 + funct3 = 0b000 + +@RV32I.instruction +class sh(S): + name = "sh" + opcode = 0b0100011 + funct3 = 0b001 + +@RV32I.instruction +class sw(S): + name = "sw" + opcode = 0b0100011 + funct3 = 0b010 + +@RV32I.instruction +class addi(I): + name = "addi" + opcode = 0b0010011 + funct3 = 0b000 + + def execute(self, pc): + # TODO + return pc + self.size + +@RV32I.instruction +class slti(I): + name = "slti" + opcode = 0b0010011 + funct3 = 0b010 + +@RV32I.instruction +class sltiu(I): + name = "sltiu" + opcode = 0b0010011 + funct3 = 0b011 + +@RV32I.instruction +class xori(I): + name = "xori" + opcode = 0b0010011 + funct3 = 0b100 + +@RV32I.instruction +class ori(I): + name = "ori" + opcode = 0b0010011 + funct3 = 0b110 + +@RV32I.instruction +class andi(I): + name = "andi" + opcode = 0b0010011 + funct3 = 0b111 + + +class ShiftImm(I): + # NOTE: This is an special type used for shifting operations because they + # have 7 bits left after the maximum shift (5bits -> 32 rotations) + # they can apply. + # In RV64I they can indicate rotation with 1 bit more (64 rotations) so + # they use a funct6 instead. + funct7 = None + funct3 = None + opcode = None + + def __init__(self, rd, rs, imm): + self.rd = rd + self.rs = rs + self.imm = imm + + def compile(self): + return c_uint32( + (self.funct7 << 25) +\ + (self.imm << 20) +\ + (self.rs << 15) +\ + (self.funct3 << 12) +\ + (self.rd << 7) +\ + self.opcode + ) + +@RV32I.instruction +class slli(ShiftImm): + name = "slli" + opcode = 0b0010011 + funct3 = 0b001 + funct7 = 0b0000000 + +@RV32I.instruction +class srli(ShiftImm): + name = "srli" + opcode = 0b0010011 + funct3 = 0b101 + funct7 = 0b0000000 + +@RV32I.instruction +class srai(ShiftImm): + name = "srai" + opcode = 0b0010011 + funct3 = 0b101 + funct7 = 0b0100000 + + +@RV32I.instruction +class add(R): + name = "add" + opcode = 0b0110011 + funct3 = 0b000 + funct7 = 0b0000000 + + def execute(self, pc): + # TODO + return pc + self.size + +@RV32I.instruction +class sub(R): + name = "sub" + opcode = 0b0110011 + funct3 = 0b000 + funct7 = 0b0100000 + +@RV32I.instruction +class sll(R): + name = "sll" + opcode = 0b0110011 + funct3 = 0b001 + funct7 = 0b0000000 + +@RV32I.instruction +class slt(R): + name = "slt" + opcode = 0b0110011 + funct3 = 0b010 + funct7 = 0b0000000 + +@RV32I.instruction +class sltu(R): + name = "sltu" + opcode = 0b0110011 + funct3 = 0b011 + funct7 = 0b0000000 + +@RV32I.instruction +class xor(R): + name = "xor" + opcode = 0b0110011 + funct3 = 0b100 + funct7 = 0b0000000 + +@RV32I.instruction +class srl(R): + name = "srl" + opcode = 0b0110011 + funct3 = 0b101 + funct7 = 0b0000000 + +@RV32I.instruction +class sra(R): + name = "sra" + opcode = 0b0110011 + funct3 = 0b101 + funct7 = 0b0100000 + +@RV32I.instruction +class _or(R): + name = "or" + opcode = 0b0110011 + funct3 = 0b110 + funct7 = 0b0000000 + +@RV32I.instruction +class _and(R): + name = "and" + opcode = 0b0110011 + funct3 = 0b111 + funct7 = 0b0000000 + +@RV32I.instruction +class ecall(I): + name = "ecall" + opcode = 0b1110011 + funct3 = 0b000 + + def __init__(self): + # NOTE: ecall is a I-type instruction but doesn't get any arg and sets + # every field to 0 + self.rd = 0b00000 + self.rs = 0b00000 + self.imm = 0b000000000000 + + +@RV32I.instruction +class ebreak(I): + name = "ebreak" + opcode = 0b1110011 + funct3 = 0b000 + + def __init__(self): + # NOTE: ebreak is a I-type instruction but doesn't get any arg and pre- + # -sets every field to a fixed value + self.rd = 0b00000 + self.rs = 0b00000 + self.imm = 0b000000000001 + + + + + + + + + + + + + + + + +@RV32I.pseudoinstruction +class j(J): + name = "j" + def __new__(cls, imm): + return jal("x0", imm) + + +if __name__ == "__main__": + print(RV32I) + print(RV32I.instructions) diff --git a/pyscv/InstructionSets/RV64I.py b/pyscv/InstructionSets/RV64I.py new file mode 100644 index 0000000..72b8bcb --- /dev/null +++ b/pyscv/InstructionSets/RV64I.py @@ -0,0 +1,63 @@ +from .instructions import Instruction, InstructionSet +from ctypes import c_uint32 +from .RV32I import * + +RV64I = InstructionSet() + +class ShiftImm64(ShiftImm): + # NOTE: This is an special type used for shifting operations because they + # have 7 bits left after the maximum shift (5bits -> 32 rotations) + # they can apply. + # In RV64I they can indicate rotation with 1 bit more (64 rotations) so + # they use a funct6 instead. + funct6 = None + funct3 = None + opcode = None + + def __init__(self, rd, rs, imm): + self.rd = rd + self.rs = rs + self.imm = imm + + def compile(self): + return c_uint32( + (self.funct6 << 26) +\ + (self.imm << 20) +\ + (self.rs << 15) +\ + (self.funct3 << 12) +\ + (self.rd << 7) +\ + self.opcode + ) + +@RV64I.instruction +class slli(ShiftImm64): + name = "slli" + opcode = 0b0010011 + funct3 = 0b001 + funct6 = 0b000000 + +@RV64I.instruction +class srli(ShiftImm64): + name = "srli" + opcode = 0b0010011 + funct3 = 0b101 + funct6 = 0b000000 + +@RV64I.instruction +class srai(ShiftImm64): + name = "srai" + opcode = 0b0010011 + funct3 = 0b101 + funct6 = 0b010000 + +@RV64I.instruction +class sd(S): + name = "sd" + opcode = 0b0100011 + funct3 = 0b011 + +@RV64I.instruction +class ld(I): + name = "ld" + opcode = 0b0000011 + funct3 = 0b011 diff --git a/pyscv/InstructionSets/__init__.py b/pyscv/InstructionSets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyscv/InstructionSets/instructions.py b/pyscv/InstructionSets/instructions.py new file mode 100644 index 0000000..00a3146 --- /dev/null +++ b/pyscv/InstructionSets/instructions.py @@ -0,0 +1,36 @@ +from ctypes import c_uint32 + + +class Instruction: + size = 4 # Instruction size in bytes + + def __init__(self): + pass + + def compile(self): + # return the binstream of the instruction in a c_uint32 + pass + + def execute(self, pc): + # executes the instruction and returns the next program counter + return + +class InstructionSet: + + def __init__(self, init=None): + self.instructions = dict() + + def instruction(self, ins): + if ins.name not in self.instructions: + self.instructions[ins.name] = ins + return ins + + # NOTE: We don't need to treat pseudoinstructions in an special way yet, + # but we separate the decorator for clarity + pseudoinstruction = instruction + + +if __name__ == "__main__": + # TODO This is the interface i'd love to have + addins = add("x5","x2","zero") + j("labelName") diff --git a/pyscv/main.py b/pyscv/main.py new file mode 100644 index 0000000..8cdc08f --- /dev/null +++ b/pyscv/main.py @@ -0,0 +1,8 @@ +# Use a generator for the execution flow +# -> PC has to be a global variable, updated by each instruction to the next val +# So user can set the PC by hand and call next(run) and make the code jump! + +pc = 0 +while True: + inst = fetch_instruction(pc) + pc = inst.execute() diff --git a/pyscv/memory.py b/pyscv/memory.py new file mode 100644 index 0000000..84bf0bd --- /dev/null +++ b/pyscv/memory.py @@ -0,0 +1,112 @@ +from ctypes import c_uint8, c_uint16, c_uint32 + +class Addressable: + def __init__(self, start=0, end=0): + self.start = start + self.end = end + def addr_to_pos(self, addr): + if not (self.start <= addr < self.end): + raise KeyError("Address out of space") + return addr - self.start + +def merge_bytes (byte_iter): + val = 0 + for i,v in enumerate(byte_ouput): + val += v.value << 8 * i + return val +def split_bytes (val, byte_count): + return tuple( c_uint8(val >> 8 * i) for i in range(byte_count) ) + +class Memory32 (Addressable): + """ + This is a raw 32-bit word memory addressable at a byte level. + Internally it is defined as c_uint8 list. + Special functions are needed to access halfs (c_uint16) and words + (c_uint32). + """ + + def __init__(self, start=0, end=0, bigEndian=False): + super().__init__(start, end) + self.bigEndian = bigEndian + self.lastChange = None + # Pre-allocate or allocate or allocate per write? + self.data = [None] * (end - start) + + def get_byte(self, addr): + return self.data[ self.addr_to_pos(addr) ] + def set_byte(self, addr, val): + if not isinstance(val, c_uint8): + if val > 0xFF: + raise ValueError("Value is larger than a byte") + self.data[ self.addr_to_pos(addr) ] = val if isinstance(val, c_uint8) else c_uint8(val) + self.lastChange = range(addr, addr+1) + + + def get_half(self,addr): + byte_output = (get_byte(addr), get_byte(addr+1)) + if self.bigEndian: + byte_output = reversed(byte_output) + return c_uint16( merge_bytes(byte_output) ) + + def set_half(self, addr, val): + if not isinstance(val, c_uint16): + if val > 0xFFFF: + raise ValueError("Value is larger than a half") + bytes = split_bytes(val, 2) + if self.bigEndian: + bytes = reversed(bytes) + for i,v in enumerate(bytes): + self.set_byte(addr+i, v) + self.lastChange = range(addr,addr+2) + + def get_word(self, pos): + byte_output = tuple(get_byte(addr+i) for i in range(4)) + if self.bigEndian: + byte_output = reversed(byte_output) + return c_uint32( merge_bytes(byte_output) ) + + def set_word(self, addr, val): + if not isinstance(val, c_uint32): + if val > 0xFFFFFFFF: + raise ValueError("Value is larger than a word") + bytes = split_bytes(val, 4) + if self.bigEndian: + bytes = reversed(bytes) + for i,v in enumerate(bytes): + self.set_byte(addr+i, v) + self.lastChange = range(addr,addr+4) + + def __str__(self): + out = " " + out += "-" * 50 + out += "\n" + for i,d in enumerate(self.data): + if d is not None: + addr = i+self.start + out += "->" if addr in self.lastChange else " " + out += f"| {addr:#20x} | {d.value:#5} | {d.value:#04x} | {d.value:#010b} |" + out += "\n" + out += " " + out += "-" * 50 + out += "\n" + return out + + +class Memory32RO (Memory32): + def set_byte(self, addr, val): + raise NotImplementedError("Trying to write in a read-only memory") + def set_half(self, addr, val): + raise NotImplementedError("Trying to write in a read-only memory") + def set_word(self, addr, val): + raise NotImplementedError("Trying to write in a read-only memory") + +class CodeMemory32 (Memory32RO): + def __init__(self, start=0, end=0): + super().__init__(writable=False,start=start,end=end) + + +if __name__ == "__main__": + m = Memory32(start=100,end=200) + print(m.start, m.end) + m.set_byte(100, 246) + print(m) diff --git a/pyscv/registers/RV32F.py b/pyscv/registers/RV32F.py new file mode 100644 index 0000000..8e3fd28 --- /dev/null +++ b/pyscv/registers/RV32F.py @@ -0,0 +1,45 @@ +from RV32I import RegistersRV32I + +class RegistersRV32F(RegistersRV32I): + + def __init__(self): + self.names = {} + self.data = [] + + self.setter = [] + self.getter = [] + + self.lastChange = None + + self.addRegister(("f0","ft0")) + self.addRegister(("f1","ft1")) + self.addRegister(("f2","ft2")) + self.addRegister(("f3","ft3")) + self.addRegister(("f4","ft4")) + self.addRegister(("f5","ft5")) + self.addRegister(("f6","ft6")) + self.addRegister(("f7","ft7")) + self.addRegister(("f8","fs0")) + self.addRegister(("f9","fs1")) + self.addRegister(("f10","a0")) + self.addRegister(("f11","a1")) + self.addRegister(("f12","a2")) + self.addRegister(("f13","a3")) + self.addRegister(("f14","a4")) + self.addRegister(("f15","a5")) + self.addRegister(("f16","a6")) + self.addRegister(("f17","a7")) + self.addRegister(("f18","fs2")) + self.addRegister(("f19","fs3")) + self.addRegister(("f20","fs4")) + self.addRegister(("f21","fs5")) + self.addRegister(("f22","fs6")) + self.addRegister(("f23","fs7")) + self.addRegister(("f24","fs8")) + self.addRegister(("f25","fs9")) + self.addRegister(("f26","fs10")) + self.addRegister(("f27","fs11")) + self.addRegister(("f28","ft8")) + self.addRegister(("f29","ft9")) + self.addRegister(("f30","ft10")) + self.addRegister(("f31","ft11")) diff --git a/pyscv/registers/RV32I.py b/pyscv/registers/RV32I.py new file mode 100644 index 0000000..a723064 --- /dev/null +++ b/pyscv/registers/RV32I.py @@ -0,0 +1,117 @@ +# Register block of RV32I +# Set and get using [] with the name of the register (lowercase) or the +# position +from ctypes import c_uint32 + +def defaultGetter(self, pos): + return self.data[pos] + +def defaultSetter(self, pos, val): + self.data[pos] = val + +def zeroSetter(self, pos, val): + # hardwired to zero + self.data[pos] = c_uint32(0) + + +class RegistersRV32I: + + def __init__(self): + self.names = {} + self.data = [] + + self.setter = [] + self.getter = [] + + self.lastChange = None + + self.addRegister(("x0","zero"), setter = zeroSetter) + self.addRegister(("x1","ra")) + self.addRegister(("x2","sp", "v0")) + self.addRegister(("x3","gp", "v1")) + self.addRegister(("x4","tp")) + self.addRegister(("x5","t0")) + self.addRegister(("x6","t1")) + self.addRegister(("x7","t2")) + self.addRegister(("x8","s0","fp")) + self.addRegister(("x9","s1")) + self.addRegister(("x10","a0")) + self.addRegister(("x11","a1")) + self.addRegister(("x12","a2")) + self.addRegister(("x13","a3")) + self.addRegister(("x14","a4")) + self.addRegister(("x15","a5")) + self.addRegister(("x16","a6")) + self.addRegister(("x17","a7")) + self.addRegister(("x18","s2")) + self.addRegister(("x19","s3")) + self.addRegister(("x20","s4")) + self.addRegister(("x21","s5")) + self.addRegister(("x22","s6")) + self.addRegister(("x23","s7")) + self.addRegister(("x24","s8")) + self.addRegister(("x25","s9")) + self.addRegister(("x26","s10")) + self.addRegister(("x27","s11")) + self.addRegister(("x28","t3")) + self.addRegister(("x29","t4")) + self.addRegister(("x30","t5")) + self.addRegister(("x31","t6")) + + + def addRegister(self, names, getter=defaultGetter, setter=defaultSetter): + for name in names: + if name in self.names: + raise KeyError("Register name already in use") + + currentpos = len(self.data) + for name in names: + self.names[name] = currentpos + self.data.append(c_uint32(0)) + + self.getter.append(getter) + self.setter.append(setter) + + def getPos(self, el): + if isinstance(el, str): + pos = self.names[el] + elif isinstance(el, int): + pos = el + else: + raise ValueError("Wrong type of register id. Must be str or int") + return pos + + def __getitem__(self, el): + pos = self.getPos(el) + return self.getter[pos](self, pos) + + def __setitem__(self, el, val): + pos = self.getPos(el) + + if val > 0xFFFFFFFF: + raise ValueError("Value is larger than size of the register") + val = c_uint32(val) + self.setter[pos](self, pos, val) + self.lastChange = pos + + + def __str__(self): + + out = " " + out += "-" * 67 + out += "\n" + for i,d in enumerate(self.data): + out += "->" if i == self.lastChange else " " + out += f" | {d.value:#13} | {d.value:#010x} | {d.value:#034b} |" + out += "\n" + out += " " + out += "-" * 67 + out += "\n" + return out + + +if __name__ == "__main__": + Regs = RegistersRV32I() + Regs[0] = 1 + Regs["x1"] = 10 + print(Regs) diff --git a/pyscv/registers/__init__.py b/pyscv/registers/__init__.py new file mode 100644 index 0000000..e69de29 -- cgit v1.2.3