From 41a2c1fdaa07b3b51bf6073bf6fbb0156b085fa5 Mon Sep 17 00:00:00 2001 From: Alexander Sulfrian Date: Tue, 23 Jun 2009 03:21:32 +0200 Subject: added parser - commited parser (Michael Popov) - changed encoding of parser.py to utf8 --- src/front/parser.py | 252 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 196 insertions(+), 56 deletions(-) (limited to 'src/front/parser.py') diff --git a/src/front/parser.py b/src/front/parser.py index 43606a2..9e738d3 100644 --- a/src/front/parser.py +++ b/src/front/parser.py @@ -1,93 +1,233 @@ -class Parser: - lexer = None +# -*- coding: utf-8 -*- +from ast import * +from lexer import * +from token import * - def __init__(self, lexer): +class Parser: + def __init__(self, lex): + self.lexer = lex + self.token = lex.scan() + self.count = 1 #debug return def parse(self): return self.program() def move(self): + print self.count, self.token #debug + self.count += 1 #debug + self.token = self.lexer.scan() return def error(self, msg): - return - - def match(tag): - return - - # bool = bool "||" join | join. - def bool(self): - return - - # join = join "&&" equality | equality. + raise Exception(msg) + + def match(self, tag): + if self.token.tag != tag: + self.error("match: expected %s got %s\n" %(tag, self.token.tag)) + val = self.token.value + self.move() + return val + + # boolean = join { "||" join }. + def boolean(self): + res = self.join() + while self.token.tag == Tag.OPERATOR and self.token.value == "||": + self.move() + res = BinaryExpression(res, "||", self.join()) + return res + + # join = equality { "&&" equality }. def join(self): - return + res = self.equality() + while self.token.tag == Tag.OPERATOR and self.token.value == "&&": + self.move() + res = BinaryExpression(res, "&&", self.relation()) + return res - # equality = equality "==" relation | equality "!=" relation | relation. + # equality = relation { ( "==" | "!=" ) relation }. def equality(self): - return + res = self.relation() + while self.token.tag == Tag.OPERATOR and self.token.value in ["==", "!="]: + op = self.match(Tag.OPERATOR) + res = BinaryExpression(res, op, self.relation()) + return res - # relation = expression "<" expression | expression "<=" expression | expression ">=" expression | expression ">" expression | expression. + # relation = expression { ( "<" | "<=" | ">=" | ">" ) expression }. def relation(self): - return + res = self.expression() + while self.token.tag == Tag.OPERATOR and self.token.value in ["<", "<=", ">=", ">"]: + op = self.match(Tag.OPERATOR) + res = BinaryExpression(res, op, self.expression()) + return res - # expression = expression "+" term | expression "-" term | term. + # expression = term { ( "+" | "-" ) term }. def expression(self): - return + res = self.term() + while self.token.tag == Tag.OPERATOR and self.token.value in ["+", "-"]: + op = self.match(Tag.OPERATOR) + res = BinaryExpression(res, op, self.term()) + return res - # term = term "*" unary | term "/" unary | unary. + # term = unary { ( "*" | "/" | "%" ) unary }. def term(self): - return + res = self.unary() + while self.token.tag == Tag.OPERATOR and self.token.value in ["*", "/", "%"]: + op = self.match(Tag.OPERATOR) + res = BinaryExpression(res, op, self.unary()) + return res # unary = "!" unary | "-" unary | factor. def unary(self): - return - - # factor = "(" bool ")" | ident | integer | "true" | "false". + if self.token.tag == Tag.OPERATOR and self.token.value == "!": + self.move() + return UnaryExpression("!", self.unary()) + if self.token.tag == Tag.OPERATOR and self.token.value == "-": + self.move() + return UnaryExpression("-", self.unary()) + return self.factor() + + # factor = "(" boolean ")" | integer | "true" | "false" | ident | function_call. def factor(self): - return + if self.token.tag == Tag.LPAREN: + self.move() + res = self.boolean() + self.match(Tag.RPAREN) + return res + if self.token.tag == Tag.NUMBER: + return Constant(self.match(Tag.NUMBER)) + if self.token.tag == Tag.TRUE: + self.move() + return Constant(1) + if self.token.tag == Tag.FALSE: + self.move() + return Constant(0) + ''' Wegen der Änderung in function_call() kann die Grammatik nicht + mehr mit einem Lookahead von 1 geparst werden. Die Lösung hier + ist, den Aufruf von function_call() zu "inlinen". Muss irgendwann + aber geändert werden. Eine Lösung wäre Funktionen mit call + aufzurufen oder eben auf LL(2) oder LR umzusteigen. + ''' + if self.token.tag == Tag.IDENT: + name = self.match(Tag.IDENT) + if self.token.tag == Tag.LBRAK: # function_call + self.match(Tag.LBRAK) + args = [] if self.token.tag == Tag.RBRAK else self.expression_list() + self.match(Tag.RBRAK) + return FunctionCall(name, args) + return Variable(name)# variable + + error("unknown factor begins with " + self.token.tag) + + # function_call = ident "[" [ expression_list ] "]". + ''' function_call muss eine expression sein sonst geht alles folgende nicht: + foo = bar[] + @foo[bar] + spam[5] + eggs[1] + andererseits ist function_call dann kein statement mehr + exit[] + wäre also falsch. Der Aufruf muss in einem assignment verkapselt werden: + dummy = exit[] + ''' + def function_call(self): + name = self.match(Tag.IDENT) + self.match(Tag.LBRAK) + args = [] if self.token.tag == Tag.RBRAK else self.expression_list() + self.match(Tag.RBRAK) + return FunctionCall(name, args) # ident_list = ident { "," ident }. def ident_list(self): - return + ident = [self.match(Tag.IDENT)] + while self.token.tag == Tag.COMMA: + self.move() + ident.append(self.match(Tag.IDENT)) + return ident - # expression_list = expression { "," expression }. + # expression_list = boolean { "," boolean }. def expression_list(self): - return - - # program = function { function }. - def program(self): - return - - # function = "fun" ident "[" [ ident_list ] "]" statements "end". + exp = [self.boolean()] + while self.token.tag == Tag.COMMA: + self.move() + exp.append(self.boolean()) + return exp + + # function_list = function { function }. + def function_list(self): + funcs = [self.function()] + while self.token != None: + funcs.append(self.function()) + return funcs + + # statement_list = statement { statement }. + def statement_list(self): + stat = [self.statement()] + while self.token.tag not in [Tag.END, Tag.ELSE]: + stat.append(self.statement()) + return stat + + # function = "fun" ident "[" [ ident_list ] "]" nl statement_list "end" nl. def function(self): - return - - # statements = statement { nl statement }. - def statements(self): - return - - # statement = [ if_statement | while_statement | assignment | function_call | return_statement ]. + self.match(Tag.FUN) + name = self.match(Tag.IDENT) + self.match(Tag.LBRAK) + args = [] if self.token.tag == Tag.RBRAK else self.ident_list() + self.match(Tag.RBRAK) + self.match(Tag.NEWLINE) + block = self.statement_list() + self.match(Tag.END) + self.match(Tag.NEWLINE) + return Function(name, args, block) + + # statement = [ if_statement | while_statement | assign_statement | return_statement ]. def statement(self): - return + return {Tag.IF: self.if_statement, + Tag.WHILE: self.while_statement, + Tag.IDENT: self.assignment, + Tag.RETURN: self.return_statement + }[self.token.tag]() - # if_statement = "if" expression statements [ "else" statements ] "end". + # if_statement = "if" boolean nl statement_list [ "else" [ nl ] statement_list ] "end" nl. def if_statement(self): - return - - # while_statement = "while" expression statements "end". + self.match(Tag.IF) + condition = self.boolean() + self.match(Tag.NEWLINE) + true_case = self.statement_list() + else_case = [] + if self.token.tag == Tag.ELSE: + self.move() + if self.token.tag == Tag.NEWLINE: self.move() + else_case = self.statement_list() + self.match(Tag.END) + self.match(Tag.NEWLINE) + return IfStatement(condition, true_case, else_case) + + # while_statement = "while" boolean nl statement_list "end" nl. def while_statement(self): - return + self.match(Tag.WHILE) + condition = self.boolean() + self.match(Tag.NEWLINE) + body = self.statement_list() + self.match(Tag.END) + self.match(Tag.NEWLINE) + return WhileStatement(condition, body) + + # return_statement = "@" boolean nl. + def return_statement(self): + self.match(Tag.RETURN) + exp = self.boolean() + self.match(Tag.NEWLINE) + return ReturnStatement(exp) - # assignment = ident "=" expression. + # assignment = ident "=" boolean nl. def assignment(self): - return + name = self.match(Tag.IDENT) + self.match(Tag.ASSIGNMENT) + exp = self.boolean() + self.match(Tag.NEWLINE) + return AssignStatement(name, exp) - # function_call = ident "[" [ expression_list ] "]". - def function_call(self): - return + # program = function_list. + def program(self): + return Program(self.function_list()) - # return_statement = "@" expression. - def return_statement(self): - return -- cgit v1.2.3 From a4f2a248273064ec464e7d376f8f5778ac808bd6 Mon Sep 17 00:00:00 2001 From: Alexander Sulfrian Date: Sun, 28 Jun 2009 15:10:57 +0200 Subject: fixed error with not newline at file end moved __init__.py with testcode from src/front to src/ added in lexer "\n\n" at source end to have a newline at file end added unexpected end of file message, if matched token is None --- src/front/parser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/front/parser.py') diff --git a/src/front/parser.py b/src/front/parser.py index 9e738d3..29e300d 100644 --- a/src/front/parser.py +++ b/src/front/parser.py @@ -23,6 +23,9 @@ class Parser: raise Exception(msg) def match(self, tag): + if self.token == None: + self.error("Unexpected end of file.") + if self.token.tag != tag: self.error("match: expected %s got %s\n" %(tag, self.token.tag)) val = self.token.value @@ -155,7 +158,7 @@ class Parser: # function_list = function { function }. def function_list(self): funcs = [self.function()] - while self.token != None: + while self.token: funcs.append(self.function()) return funcs -- cgit v1.2.3