summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pysc-v/Frontend/lexer.py273
-rw-r--r--pysc-v/Frontend/logger.py10
-rw-r--r--pysc-v/Frontend/parser.py1
-rw-r--r--pysc-v/Frontend/reader.py33
4 files changed, 305 insertions, 12 deletions
diff --git a/pysc-v/Frontend/lexer.py b/pysc-v/Frontend/lexer.py
index 5175097..c4a9a7d 100644
--- a/pysc-v/Frontend/lexer.py
+++ b/pysc-v/Frontend/lexer.py
@@ -1,4 +1,269 @@
-# Returns:
-# - label
-# - instruction (with args)
-# - directive (with args)
+# TODO Logging is interesting for debugging purposes, decide what to do with it
+from logger import newlogger
+logger = newlogger(__name__)
+#import logging
+#logger.setLevel(logging.WARN)
+
+
+######
+
+from enum import Enum
+from reader import Reader
+
+binChars = set("01")
+octChars = set("01234567")
+decChars = set("0123456789")
+hexChars = set("0123456789ABCDEFabcdef")
+
+class TokenType(Enum):
+ """
+ These are the possible tokens that the lexer knows, they are converted to
+ their most accurate representation in python.
+ """
+ identifier = 0 # Represented as strings
+ label = 1 # Represented as strings or integer if they are numeric
+ instruction = 2 # Represented as strings
+ directive = 3 # Represented as strings
+ integer = 4 # Represented as int
+ character = 5 # Represented as strings of length 1
+ string = 6 # Represented as strings
+ float = 7 # Represented as ??
+ # (we need correct conversions to the binary value)
+ # TODO
+ end = 8
+ argsep = 9
+ openparens = 10
+ closeparens = 11
+
+class Lexer:
+ def __init__(self, reader):
+ self.reader = reader
+ self.tokenstart = 0
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if self.reader.char is None:
+ self.reader.advance()
+ while True:
+ try:
+ # Instruction end
+ if self.reader.char == "\n" or self.reader.char == ";":
+ self.reader.advance()
+ return (TokenType.end, None)
+
+ # Argument separator
+ elif self.reader.char == ",":
+ self.reader.advance()
+ return (TokenType.argsep, None)
+
+ # String
+ elif self.reader.char == '"':
+ return self.string()
+
+ # Character
+ elif self.reader.char == "'":
+ return self.character()
+
+ # Comment
+ elif self.reader.char == "#":
+ self.comment()
+
+ # Parenthesis
+ # Load a register as an address + an offset
+ elif self.reader.char == '(':
+ self.reader.advance()
+ return (TokenType.openparens, None)
+ elif self.reader.char == ')':
+ self.reader.advance()
+ return (TokenType.closeparens, None)
+
+ # Starts with digit:
+ # - Numbers (any kind)
+ # - Numeric Labels
+ # - Numeric Label references
+ # - Load register as address: ld a1, 4(a0)
+ # ^^^^^
+ # offset + reg
+ elif self.reader.char.isdigit() or self.reader.char == "-":
+ return self.number()
+
+ # Identifiers
+ elif self.reader.char.isalpha():
+ return self.identifier()
+
+ elif self.reader.char == "":
+ break # FILE END
+
+ else:
+ # TODO: Remove this, it's just for testing
+ self.reader.advance()
+ except Exception as e:
+ # Handle exceptions
+ # raise StopIteration
+ raise e
+ raise StopIteration
+
+ def string (self):
+ logger.info("Found string")
+ self.reader.advance() # Ignore opening quotes
+
+ string = ""
+ escaped = False # Set if previous character was a backslash
+
+ while self.reader.char != '"' or escaped:
+ if self.reader.char == "":
+ # TODO: Check how to do this
+ raise ValueError("Error: string not closed, found EOF")
+ if self.reader.char == "\n":
+ # TODO: Consider the string as closed and continue but report
+ # the error?
+ raise ValueError("Error: string not closed, found newline")
+
+ if escaped:
+ string += self.escaped_char(self.reader.char)
+ logger.debug("Escape sequence processed %s",
+ string[-1].__repr__())
+ escaped = False
+ continue
+ if self.reader.char == "\\":
+ escaped = True
+ self.reader.advance()
+ continue
+ escaped = False
+ string += self.reader.char
+ self.reader.advance()
+ logger.info("Lexed string %s", string.__repr__())
+ self.reader.advance() # Discard closing "
+ return (TokenType.string, string)
+
+ def character (self):
+ logger.info("Found character")
+ self.reader.advance() # Ignore the opening quote
+ character = self.reader.char
+ if character == "":
+ # TODO: Check how to do this
+ raise ValueError("Error: found EOF")
+ if not character.isprintable() :
+ raise ValueError("Error: Non printable character")
+
+ if character == "\\":
+ self.reader.advance()
+ character = self.escaped_char(self.reader.char)
+ logger.debug("Escape sequence processed: %s", character.__repr__())
+
+ # Make sure it's correctly closed
+ self.reader.advance()
+ if self.reader.char != "'":
+ raise ValueError("Parse error: expected `'`, found " + self.reader.char)
+ self.reader.advance() # Discard closing '
+
+ logger.info("Lexed char %s", character.__repr__())
+ return (TokenType.character, character)
+
+ def comment(self):
+ while self.reader != "\n":
+ self.reader.advance()
+
+ def escaped_char(self, ch):
+ if ch == '"':
+ return '"'
+ elif ch == 'n':
+ return '\n'
+ elif ch == 't':
+ return '\t'
+ elif ch == '\\':
+ return '\\'
+ else:
+ # TODO: implement more escape sequences
+ return ""
+
+
+ def number(self):
+ """
+ Process anything that starts with a number. Could be:
+ - An actual number in hex, octal, binary or decimal
+ - Numeric labels or numeric label references
+ - floating point (not implemented yet)
+ """
+ numbstr = self.reader.char
+ self.reader.advance()
+
+ logger.info("Found number: %s", numbstr)
+
+ # Hex, Bin, Oct and the 0
+ if numbstr == "0":
+ reprid = self.reader.peek()
+ if reprid == "x":
+ self.reader.advance()
+ return self.hex()
+ elif reprid == "b":
+ self.reader.advance()
+ return self.bin()
+ elif reprid.isdigit():
+ return self.oct()
+ else:
+ return (TokenType.integer, 0)
+
+ # TODO: floating point numbers
+
+ # Decimal number
+ # consume a decimal number
+ while self.reader.char in decChars:
+ numbstr += self.reader.char
+ self.reader.advance()
+
+ # Numeric label reference
+ if self.reader.char in set("bf"):
+ numbstr += self.reader.char
+ self.reader.advance()
+ return (TokenType.identifier, numbstr)
+ # TODO: Return identifier or label?
+ # It's a label-ref not a label!
+
+ # Numeric label
+ if self.reader.char in ":":
+ self.reader.advance()
+ return (TokenType.label, numbstr)
+
+ return (TokenType.integer, int(numbstr))
+
+ def hex(self):
+ # TODO: Handle possible errors
+ numbstr = ""
+ while self.reader.char in hexChars:
+ numbstr += self.reader.char
+ self.reader.advance()
+ return (TokenType.integer, int(numbstr, 16))
+
+ def oct(self):
+ # TODO: Handle possible errors
+ numbstr = ""
+ while self.reader.char in octChars:
+ numbstr += self.reader.char
+ self.reader.advance()
+ return (TokenType.integer, int(numbstr, 8))
+
+ def bin(self):
+ # TODO: Handle possible errors
+ numbstr = ""
+ while self.reader.char in binChars:
+ numbstr += self.reader.char
+ self.reader.advance()
+ return (TokenType.integer, int(numbstr, 2))
+
+
+ def identifier(self):
+ s = ""
+ while self.reader.char.isalnum() or self.reader.char == "_":
+ s += self.reader.char
+ self.reader.advance()
+ return (TokenType.identifier, s)
+
+if __name__ == "__main__":
+ import sys
+ with Reader(sys.argv[1]) as src:
+ lexer = Lexer(src)
+ for token in lexer:
+ print(token)
diff --git a/pysc-v/Frontend/logger.py b/pysc-v/Frontend/logger.py
new file mode 100644
index 0000000..a0f76e1
--- /dev/null
+++ b/pysc-v/Frontend/logger.py
@@ -0,0 +1,10 @@
+import logging
+import sys
+
+logging.basicConfig()
+
+
+def newlogger(name):
+ logger = logging.getLogger(name)
+ logger.setLevel(level=logging.DEBUG)
+ return logger
diff --git a/pysc-v/Frontend/parser.py b/pysc-v/Frontend/parser.py
new file mode 100644
index 0000000..b5acfbb
--- /dev/null
+++ b/pysc-v/Frontend/parser.py
@@ -0,0 +1 @@
+from InstructionSets import RV32I, RV32C, RV32F, RV32D
diff --git a/pysc-v/Frontend/reader.py b/pysc-v/Frontend/reader.py
index 45d155c..7694a94 100644
--- a/pysc-v/Frontend/reader.py
+++ b/pysc-v/Frontend/reader.py
@@ -1,24 +1,40 @@
+# TODO Logging is interesting for debugging purposes, decide what to do with it
+from logger import newlogger
+logger = newlogger(__name__)
+#import logging
+#logger.setLevel(logging.WARN)
+
class Reader:
def __init__(self, filename):
self.filename = filename
self._file = None
self.lineno = 0
self.charno = 0
+ self.char = None
- def __enter__(self):
+ def open(self):
self._file = open(self.filename, "r")
+ return
+ def close(self):
+ self._file.close()
+ return
+
+ def __enter__(self):
+ self.open()
return self
def __exit__(self, type, value, tb):
- self._file.close()
+ self.close()
- def char(self):
- ch = self._file.read(1)
+ def advance(self):
self.charno += 1
- if ch == "\n":
+ if self.char == "\n":
self.lineno += 1
self.charno = 0
- return ch
+ self.char = self._file.read(1)
+
+ logger.debug("Read: %s" % self.char.__repr__())
+ return
def peek(self):
pos = self._file.tell()
@@ -28,7 +44,8 @@ class Reader:
if __name__ == "__main__":
- with Reader("reader.py") as src:
+ with Reader(__file__ ) as src:
print(src.peek())
while src.peek() != "":
- print(src.char())
+ src.advance()
+ print(src.char)