• R/O
  • HTTP
  • SSH
  • HTTPS

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

A fast implementation of the Nix expression language


File Info

Rev. 376831c93fc2872a6134b52fd5bec158805e195f
大小 17,891 字节
时间 2024-06-10 07:57:27
作者 Corbin
Log Message

parser: Add weird let-expression and rec-expression.

Content

import rply
from rply.token import BaseBox

import heap

# A basic Nix parser ported from CppNix.
# LoC target:
# 456 (parser.y) + 293 (lexer.l) = 749

lg = rply.LexerGenerator()

# Lexer states for quasiliterals.
class LexerState(object): pass
class _StateExpr(LexerState): pass
STATE_EXPR = _StateExpr()
class _StateString(LexerState): pass
STATE_STRING = _StateString()

STRING_CHAR = "([^\$\"\\\\]|\$[^\{\"\\\\])"

lg.add("STRING", "\"%s*\"" % STRING_CHAR)
lg.add("STRING_INIT", "\"%s*\$\{" % STRING_CHAR, push=[STATE_STRING])
lg.add("STRING_PIECE", "\}%s*\$\{" % STRING_CHAR, state=STATE_STRING)
lg.add("STRING_END", "\}%s*\"" % STRING_CHAR, state=STATE_STRING, pop=True)

KEYWORDS = "IF THEN ELSE ASSERT WITH LET REC INHERIT OR IN".split()
for kw in KEYWORDS: lg.add(kw, kw.lower())

lg.add("ELLIPSIS", "\.\.\.")
lg.add("EQ", "\=\=")
lg.add("NEQ", "\!\=")
lg.add("LEQ", "\<\=")
lg.add("LE", "\<")
lg.add("GEQ", "\>\=")
lg.add("GE", "\>")
lg.add("AND", "\&\&")
lg.add("OR_OP", "\|\|")
lg.add("IMPL", "\-\>")
lg.add("UPDATE", "\/\/")
lg.add("CONCAT", "\+\+")
lg.add("PLUS", "\+")
lg.add("MINUS", "-")
lg.add("MUL", "\*")
lg.add("DIV", "\/")
lg.add("NOT", "!")
lg.add("HAS", "\?")

lg.add("COLON", ":")
lg.add("SEMI", ";")
lg.add("OPEN_BRACE", "\{", push=[STATE_EXPR])
lg.add("CLOSE_BRACE", "\}", pop=True)
lg.add("OPEN_BRACK", "\[")
lg.add("CLOSE_BRACK", "\]")
lg.add("OPEN_PAREN", "\(")
lg.add("CLOSE_PAREN", "\)")
lg.add("DOT", "\.")
lg.add("COMMA", ",")
lg.add("AT", "@")
lg.add("EQUALS", "=")

PATH_CHAR = "[a-zA-Z0-9\.\_\-\+]"
lg.add("URI", "[a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+")
lg.add("ID", "[a-zA-Z\_][a-zA-Z0-9\_\'\-]*")
lg.add("INT", "[0-9]+")
lg.add("FLOAT", "(([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?")
lg.add("PATH_CHAR", PATH_CHAR)
lg.add("PATH", "{0}*(\/{0}+)+\/?".format(PATH_CHAR))
lg.add("PATH_SEG", "{0}*\/".format(PATH_CHAR))
lg.add("HPATH", "\~(\/{0}+)+\/?".format(PATH_CHAR))
lg.add("HPATH_START", "\~\/")
lg.add("SPATH", "\<{0}+(\/{0}+)*\>".format(PATH_CHAR))

lg.ignore("[ \t\r\n]+")
lg.ignore("#[^\r\n]*")
lg.ignore("\/\*([^*]|\*+[^*/])*\*+\/")

lexer = lg.build()

class NotAnExpr(Exception):
    def __init__(self, cls): self.cls = cls

class MissingVar(Exception):
    def __init__(self, name): self.name = name

class RegiuxBox(BaseBox):
    "A box for any syntax."
    cls = "unknown"
    def compile(self, scope): raise NotAnExpr(self.cls)

class EBox(RegiuxBox):
    "A box for expressions."
    cls = "expr"
    def compile(self, scope):
        print "Implementation error: Expression class", self.__class__, "can't be compiled (yet)"
        assert False

class IntBox(EBox):
    def __init__(self, value): self.value = value
    def pretty(self): return str(self.value)
    def compile(self, scope): return heap.HeapInt(self.value)

class StrBox(EBox):
    def __init__(self, value): self.value = value
    def pretty(self): return '"%s"' % self.value
    def compile(self, scope): return heap.HeapStr(self.value)

class IfBox(EBox):
    def __init__(self, cond, seq, alt):
        self.cond = cond
        self.seq = seq
        self.alt = alt
    def pretty(self):
        return "if %s then %s else %s" % (
                self.cond.pretty(), self.seq.pretty(), self.alt.pretty())
    def compile(self, scope):
        return heap.HeapCond(self.cond.compile(scope), self.seq.compile(scope),
                             self.alt.compile(scope))

class BindsBox(EBox):
    def __init__(self, binds): self.binds = binds
    def pretty(self):
        binds = [bind.pretty() for bind in self.binds]
        return "{ %s }" % " ".join(binds)
    def getbinds(self): return self.binds
    def compile(self, scope):
        attrs = {}
        for bind in self.binds:
            for name in bind.getnames():
                if name not in scope: raise MissingVar(name)
                attrs[name] = scope[name]
        return heap.HeapAttrSet(attrs)

class ListBox(EBox):
    def __init__(self, exprs): self.exprs = exprs
    def pretty(self):
        return "[ %s ]" % " ".join([expr.pretty() for expr in self.exprs])
    def getexprs(self): return self.exprs
    def compile(self, scope):
        return heap.HeapList([expr.compile(scope) for expr in self.exprs])

# XXX bad encapsulation
def attr2str(box):
    assert isinstance(box, StrBox)
    return box.value

class SelectBox(EBox):
    def __init__(self, expr, path, alt):
        self.expr = expr
        self.path = path
        self.alt = alt
    def pretty(self):
        if self.alt is None:
            return "%s.%s" % (self.expr.pretty(), self.path.pretty())
        else:
            return "%s.%s or %s" % (self.expr.pretty(), self.path.pretty(), self.alt.pretty())
    def compile(self, scope):
        assert self.alt is None, "can't compile yet"
        return heap.HeapSelect(self.expr.compile(scope),
                               [attr2str(attr) for attr in self.path.getattrs()])

class VarBox(EBox):
    def __init__(self, name): self.name = name
    def pretty(self): return self.name
    def compile(self, scope):
        if self.name in scope: return scope[self.name]
        raise MissingVar(self.name)

class AppBox(EBox):
    def __init__(self, func, arg):
        self.func = func
        self.arg = arg
    def pretty(self): return "(%s) (%s)" % (self.func.pretty(), self.arg.pretty())
    def compile(self, scope):
        return heap.HeapApp(self.func.compile(scope), self.arg.compile(scope))

class StrQLBox(EBox):
    def __init__(self, init, pairs):
        self.init = init
        self.pairs = pairs
    def pretty(self):
        rv = [self.init]
        for expr, piece in self.pairs:
            rv.append("${ ")
            rv.append(expr.pretty())
            rv.append(" }")
            rv.append(piece)
        return '"%s"' % "".join(rv)
    def compile(self, scope):
        l = [heap.HeapStr(self.init)]
        for expr, piece in self.pairs:
            l.append(expr.compile(scope))
            l.append(heap.HeapStr(piece))
        return heap.HeapApp(
                heap.HeapApp(heap.HeapSelect(scope["builtins"], ["concatStringsSep"]),
                             heap.HeapStr("")),
                heap.HeapList(l))

class BindBox(RegiuxBox):
    "A box for attrset attributes."
    cls = "attrs"

class BindExprBox(BindBox):
    def __init__(self, path, expr):
        self.path = path
        self.expr = expr
    def pretty(self): return "%s = %s;" % (self.path.pretty(), self.expr.pretty())
    def getnames(self):
        return [attr2str(self.path.getattrs()[0])]

class BindInheritBox(BindBox):
    def __init__(self, attrs, scope):
        self.attrs = attrs
        self.scope = scope
    def pretty(self):
        if self.scope:
            return "inherit (%s) %s;" % (self.scope.pretty(), self.attrs.pretty())
        else: return "inherit %s;" % self.attrs.pretty()
    def getnames(self): return [attr2str(attr) for attr in self.attrs.getattrs()]

# XXX below boxes need cls and compile

class FormalBox(BaseBox):
    def __init__(self, name, default):
        self.name = name
        self.default = default
    def pretty(self):
        if self.default is None:
            return self.name
        else:
            return "%s ? %s" % (self.name, self.default.pretty())

class FormalsBox(BaseBox):
    def __init__(self, params, hasEllipsis):
        self.params = params
        self.hasEllipsis = hasEllipsis
    def pretty(self):
        params = [param.pretty() for param in self.params]
        if self.hasEllipsis: params.append("...")
        return "{ %s }" % ", ".join(params)
    def getparams(self): return self.params
    def getellipsis(self): return self.hasEllipsis

class AttrPathBox(BaseBox):
    def __init__(self, attrs): self.attrs = attrs
    def pretty(self): return ".".join([attr.pretty() for attr in self.attrs])
    def getattrs(self): return self.attrs

class AttrsBox(BaseBox):
    def __init__(self, attrs): self.attrs = attrs
    def pretty(self): return " ".join([attr.pretty() for attr in self.attrs])
    def getattrs(self): return self.attrs

class ExprUnaryBox(BaseBox):
    def __init__(self, expr, op):
        self.expr = expr
        self.op = op.getstr()
    def pretty(self):
        return "(%s%s)" % (self.op, self.expr.pretty())

class ExprBinaryBox(BaseBox):
    def __init__(self, left, right, verb):
        self.left = left
        self.right = right
        self.verb = verb
    def pretty(self):
        return "(%s %s %s)" % (self.verb, self.left.pretty(), self.right.pretty())
    def compile(self, scope):
        return heap.HeapApp(
                heap.HeapApp(heap.HeapSelect(scope["builtins"], [self.verb]),
                             self.left.compile(scope)),
                self.right.compile(scope))

class HasBox(BaseBox):
    def __init__(self, value, path):
        self.value = value
        self.path = path
    def pretty(self):
        return "(%s ? %s)" % (self.value.pretty(), self.path.pretty())

class AssertBox(BaseBox):
    def __init__(self, cond, expr):
        self.cond = cond
        self.expr = expr
    def pretty(self): return "assert %s; %s" % (self.cond.pretty(), self.expr.pretty())

class LetBox(BaseBox):
    def __init__(self, binds, expr):
        self.binds = binds
        self.expr = expr
    def pretty(self): return "let %s in %s" % (self.binds.pretty(), self.expr.pretty())

class WithBox(BaseBox):
    def __init__(self, scope, expr):
        self.scope = scope
        self.expr = expr
    def pretty(self): return "with %s; %s" % (self.scope.pretty(), self.expr.pretty())

class WeirdLetBox(BaseBox):
    def __init__(self, binds): self.binds = binds
    def pretty(self): return "let " + self.binds.pretty()

class RecBox(BaseBox):
    def __init__(self, binds): self.binds = binds
    def pretty(self): return "rec " + self.binds.pretty()

class LambdaBox(BaseBox):
    def __init__(self, binding, params, body):
        self.binding = binding
        self.params = params
        self.body = body
    def pretty(self):
        body = self.body.pretty()
        if self.binding and self.params:
            return "%s@%s: %s" % (self.binding.pretty(), self.params.pretty(), body)
        elif self.binding: return "%s: %s" % (self.binding.pretty(), body)
        elif self.params: return "%s: %s" % (self.params.pretty(), body)
        else: return "_: " + body

class IncompleteQL(BaseBox):
    def __init__(self, pairs): self.pairs = pairs

pg = rply.ParserGenerator(KEYWORDS + [
    "ID", "INT", "SPATH", "STRING", "URI",
    "STRING_INIT", "STRING_PIECE",  "STRING_END",
    "AND", "IMPL", "OR_OP",
    "EQ", "NEQ", "LE", "GE", "LEQ", "GEQ", "HAS",
    "CONCAT", "UPDATE",
    "DIV", "MINUS", "MUL", "PLUS",
    "NEGATE", "NOT",
    "COLON", "SEMI", "DOT", "COMMA", "AT", "EQUALS", "ELLIPSIS",
    "OPEN_BRACE", "CLOSE_BRACE", "OPEN_BRACK", "CLOSE_BRACK", "OPEN_PAREN", "CLOSE_PAREN",
], precedence=[
    ("right", ["IMPL"]),
    ("left", ["OR_OP"]),
    ("left", ["AND"]),
    ("nonassoc", ["EQ", "NEQ"]),
    ("nonassoc", ["LE", "GE", "LEQ", "GEQ"]),
    ("right", ["UPDATE"]),
    ("left", ["NOT"]),
    ("left", ["PLUS", "MINUS"]),
    ("left", ["MUL", "DIV"]),
    ("right", ["CONCAT"]),
    ("nonassoc", ["HAS"]),
    ("nonassoc", ["NEGATE"]),
])

def trimBox(b, start, stop):
    s = b.getstr()
    if stop < 0: stop += len(s)
    assert stop >= 0, "Invariant from lexer regex"
    return s[start:stop]

class ParseError(Exception):
    def __init__(self, token): self.token = token

@pg.error
def parseError(token): raise ParseError(token)

def constRule(rule, pb): pg.production(rule)(lambda _: pb)
def enclosedRule(rule, i): pg.production(rule)(lambda p: p[i])
def precRule(sup, sub): enclosedRule("expr%s : expr%s" % (sup, sub), 0)
SPINE = "", "_function", "_if", "_op", "_app", "_select", "_simple"
for sup, sub in zip(SPINE, SPINE[1:]): precRule(sup, sub)

@pg.production("expr_function : ID COLON expr_function")
def exprLambda(p): return LambdaBox(VarBox(p[0].getstr()), None, p[2])

@pg.production("expr_function : OPEN_BRACE formals CLOSE_BRACE COLON expr_function")
def exprLambda(p): return LambdaBox(None, p[1], p[4])

@pg.production("expr_function : OPEN_BRACE formals CLOSE_BRACE AT ID COLON expr_function")
def exprLambda(p): return LambdaBox(VarBox(p[4].getstr()), p[1], p[6])

@pg.production("expr_function : ID AT OPEN_BRACE formals CLOSE_BRACE COLON expr_function")
def exprLambda(p): return LambdaBox(VarBox(p[0].getstr()), p[3], p[6])

@pg.production("expr_function : ASSERT expr SEMI expr_function")
def exprAssert(p): return AssertBox(p[1], p[3])

@pg.production("expr_function : LET binds IN expr_function")
def exprLet(p): return LetBox(p[1], p[3])

@pg.production("expr_function : WITH expr SEMI expr_function")
def exprWith(p): return WithBox(p[1], p[3])

@pg.production("expr_if : IF expr THEN expr ELSE expr")
def exprIf(p): return IfBox(p[1], p[3], p[5])

@pg.production("expr_op : NEGATE expr_op | NOT expr_op")
def exprUnary(p): return ExprUnaryBox(p[1], p[0])

opDict = {
    "*": "mul",
    "+": "add",
    "-": "sub",
}

@pg.production("expr_op : expr_op AND expr_op")
@pg.production("expr_op : expr_op CONCAT expr_op")
@pg.production("expr_op : expr_op DIV expr_op")
@pg.production("expr_op : expr_op EQ expr_op")
@pg.production("expr_op : expr_op GE expr_op")
@pg.production("expr_op : expr_op GEQ expr_op")
@pg.production("expr_op : expr_op IMPL expr_op")
@pg.production("expr_op : expr_op LE expr_op")
@pg.production("expr_op : expr_op LEQ expr_op")
@pg.production("expr_op : expr_op MINUS expr_op")
@pg.production("expr_op : expr_op MUL expr_op")
@pg.production("expr_op : expr_op NEQ expr_op")
@pg.production("expr_op : expr_op OR_OP expr_op")
@pg.production("expr_op : expr_op PLUS expr_op")
@pg.production("expr_op : expr_op UPDATE expr_op")
def exprBinary(p):
    return ExprBinaryBox(p[0], p[2], opDict[p[1].getstr()])

@pg.production("expr_op : expr_op HAS attrpath")
def exprHas(p): return HasBox(p[0], p[2])

@pg.production("expr_app : expr_app expr_select")
def exprApp(p): return AppBox(p[0], p[1])

@pg.production("expr_select : expr_simple DOT attrpath | expr_simple DOT attrpath OR expr_select")
def exprSelect(p): return SelectBox(p[0], p[2], p[4] if len(p) == 5 else None)

@pg.production("expr_simple : ID")
def exprSimpleId(p): return VarBox(p[0].getstr())

@pg.production("expr_simple : INT")
def exprSimpleInt(p): return IntBox(int(p[0].getstr()))

@pg.production("expr_simple : STRING")
def exprQuoted(p): return StrBox(trimBox(p[0], 1, -1))

@pg.production("expr_simple : STRING_INIT string_ql")
def stringQL(p):
    s = trimBox(p[0], 1, -2)
    incomplete = p[1]
    assert isinstance(incomplete, IncompleteQL)
    return StrQLBox(s, incomplete.pairs)

# XXX should be left-recursive? Requires refactoring all string_ql rules
@pg.production("string_ql : expr STRING_PIECE string_ql")
def stringQLPiece(p):
    incomplete = p[2]
    assert isinstance(incomplete, IncompleteQL)
    pairs = [(p[0], trimBox(p[1], 1, -2))] + incomplete.pairs
    return IncompleteQL(pairs)

@pg.production("string_ql : expr STRING_END")
def stringQLEnd(p): return IncompleteQL([(p[0], trimBox(p[1], 1, -1))])

@pg.production("expr_simple : URI")
def exprURI(p): return StrBox(p[0].getstr())

@pg.production("expr_simple : LET OPEN_BRACE binds CLOSE_BRACE")
def exprWeirdLet(p): return WeirdLetBox(p[2])

@pg.production("expr_simple : REC OPEN_BRACE binds CLOSE_BRACE")
def exprRec(p): return RecBox(p[2])

enclosedRule("expr_simple : OPEN_BRACE binds CLOSE_BRACE", 1)
enclosedRule("expr_simple : OPEN_PAREN expr CLOSE_PAREN", 1)
enclosedRule("expr_simple : OPEN_BRACK expr_list CLOSE_BRACK", 1)
constRule("expr_list :", ListBox([]))

@pg.production("expr_list : expr_list expr_select")
def exprListCons(p): return ListBox(p[0].getexprs() + [p[1]])

@pg.production("binds : binds attrpath EQUALS expr SEMI")
def bindsExpr(p): return BindsBox(p[0].getbinds() + [BindExprBox(p[1], p[3])])

@pg.production("binds : binds INHERIT attrs SEMI")
def bindsInherit(p):
    return BindsBox(p[0].getbinds() + [BindInheritBox(p[2], None)])

@pg.production("binds : binds INHERIT OPEN_PAREN expr CLOSE_PAREN attrs SEMI")
def bindsInherit(p):
    return BindsBox(p[0].getbinds() + [BindInheritBox(p[5], p[3])])

constRule("binds :", BindsBox([]))

@pg.production("formals : formals COMMA formal")
def formalsComma(p):
    return FormalsBox(p[0].getparams() + [p[2]], p[0].getellipsis())

@pg.production("formals : formal")
def formalsFormal(p): return FormalsBox([p[0]], False)

# XXX causes a conflict with "binds :"
constRule("formals :", FormalsBox([], False))
constRule("formals : ELLIPSIS", FormalsBox([], True))

@pg.production("formal : ID | ID HAS expr")
def formalId(p): return FormalBox(p[0].getstr(), p[2] if len(p) == 3 else None)

@pg.production("attrs : attrs attr")
def attrsAttr(p): return AttrsBox(p[0].getattrs() + [p[1]])

constRule("attrs :", AttrsBox([]))

@pg.production("attrpath : attrpath DOT attr")
def attrpathNil(p): return AttrPathBox(p[0].getattrs() + [p[2]])

@pg.production("attrpath : attr")
def attrpathCons(p): return AttrPathBox(p)

@pg.production("attr : ID")
def attrId(p): return StrBox(p[0].getstr())

parser = pg.build()
def printConflict(table, row):
    print "Conflict:", row
    st = row[0]
    print "Action:", table.lr_action[st], "Goto:", table.lr_goto[st], "Reduction:", table.default_reductions[st]
def checkTable(table):
    if table.sr_conflicts:
        print("Shift/reduce conflicts:")
        for row in table.sr_conflicts: printConflict(table, row)
    if table.rr_conflicts:
        print("Reduce/reduce conflicts:")
        for row in table.rr_conflicts: printConflict(table, row)
    # assert not table.sr_conflicts and not table.rr_conflicts
checkTable(parser.lr_table)

# Top-level interface: Lex and parse UTF-8 text.
def parse(expr): return parser.parse(lexer.lex(expr))