A fast implementation of the Nix expression language
Rev. | 376831c93fc2872a6134b52fd5bec158805e195f |
---|---|
大小 | 17,891 字节 |
时间 | 2024-06-10 07:57:27 |
作者 | Corbin |
Log Message | parser: Add weird let-expression and rec-expression.
|
import rply
from rply.token import BaseBox
import heap
# A basic Nix parser ported from CppNix.
# LoC target:
# 456 (parser.y) + 293 (lexer.l) = 749
lg = rply.LexerGenerator()
# Lexer states for quasiliterals.
class LexerState(object): pass
class _StateExpr(LexerState): pass
STATE_EXPR = _StateExpr()
class _StateString(LexerState): pass
STATE_STRING = _StateString()
STRING_CHAR = "([^\$\"\\\\]|\$[^\{\"\\\\])"
lg.add("STRING", "\"%s*\"" % STRING_CHAR)
lg.add("STRING_INIT", "\"%s*\$\{" % STRING_CHAR, push=[STATE_STRING])
lg.add("STRING_PIECE", "\}%s*\$\{" % STRING_CHAR, state=STATE_STRING)
lg.add("STRING_END", "\}%s*\"" % STRING_CHAR, state=STATE_STRING, pop=True)
KEYWORDS = "IF THEN ELSE ASSERT WITH LET REC INHERIT OR IN".split()
for kw in KEYWORDS: lg.add(kw, kw.lower())
lg.add("ELLIPSIS", "\.\.\.")
lg.add("EQ", "\=\=")
lg.add("NEQ", "\!\=")
lg.add("LEQ", "\<\=")
lg.add("LE", "\<")
lg.add("GEQ", "\>\=")
lg.add("GE", "\>")
lg.add("AND", "\&\&")
lg.add("OR_OP", "\|\|")
lg.add("IMPL", "\-\>")
lg.add("UPDATE", "\/\/")
lg.add("CONCAT", "\+\+")
lg.add("PLUS", "\+")
lg.add("MINUS", "-")
lg.add("MUL", "\*")
lg.add("DIV", "\/")
lg.add("NOT", "!")
lg.add("HAS", "\?")
lg.add("COLON", ":")
lg.add("SEMI", ";")
lg.add("OPEN_BRACE", "\{", push=[STATE_EXPR])
lg.add("CLOSE_BRACE", "\}", pop=True)
lg.add("OPEN_BRACK", "\[")
lg.add("CLOSE_BRACK", "\]")
lg.add("OPEN_PAREN", "\(")
lg.add("CLOSE_PAREN", "\)")
lg.add("DOT", "\.")
lg.add("COMMA", ",")
lg.add("AT", "@")
lg.add("EQUALS", "=")
PATH_CHAR = "[a-zA-Z0-9\.\_\-\+]"
lg.add("URI", "[a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+")
lg.add("ID", "[a-zA-Z\_][a-zA-Z0-9\_\'\-]*")
lg.add("INT", "[0-9]+")
lg.add("FLOAT", "(([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?")
lg.add("PATH_CHAR", PATH_CHAR)
lg.add("PATH", "{0}*(\/{0}+)+\/?".format(PATH_CHAR))
lg.add("PATH_SEG", "{0}*\/".format(PATH_CHAR))
lg.add("HPATH", "\~(\/{0}+)+\/?".format(PATH_CHAR))
lg.add("HPATH_START", "\~\/")
lg.add("SPATH", "\<{0}+(\/{0}+)*\>".format(PATH_CHAR))
lg.ignore("[ \t\r\n]+")
lg.ignore("#[^\r\n]*")
lg.ignore("\/\*([^*]|\*+[^*/])*\*+\/")
lexer = lg.build()
class NotAnExpr(Exception):
def __init__(self, cls): self.cls = cls
class MissingVar(Exception):
def __init__(self, name): self.name = name
class RegiuxBox(BaseBox):
"A box for any syntax."
cls = "unknown"
def compile(self, scope): raise NotAnExpr(self.cls)
class EBox(RegiuxBox):
"A box for expressions."
cls = "expr"
def compile(self, scope):
print "Implementation error: Expression class", self.__class__, "can't be compiled (yet)"
assert False
class IntBox(EBox):
def __init__(self, value): self.value = value
def pretty(self): return str(self.value)
def compile(self, scope): return heap.HeapInt(self.value)
class StrBox(EBox):
def __init__(self, value): self.value = value
def pretty(self): return '"%s"' % self.value
def compile(self, scope): return heap.HeapStr(self.value)
class IfBox(EBox):
def __init__(self, cond, seq, alt):
self.cond = cond
self.seq = seq
self.alt = alt
def pretty(self):
return "if %s then %s else %s" % (
self.cond.pretty(), self.seq.pretty(), self.alt.pretty())
def compile(self, scope):
return heap.HeapCond(self.cond.compile(scope), self.seq.compile(scope),
self.alt.compile(scope))
class BindsBox(EBox):
def __init__(self, binds): self.binds = binds
def pretty(self):
binds = [bind.pretty() for bind in self.binds]
return "{ %s }" % " ".join(binds)
def getbinds(self): return self.binds
def compile(self, scope):
attrs = {}
for bind in self.binds:
for name in bind.getnames():
if name not in scope: raise MissingVar(name)
attrs[name] = scope[name]
return heap.HeapAttrSet(attrs)
class ListBox(EBox):
def __init__(self, exprs): self.exprs = exprs
def pretty(self):
return "[ %s ]" % " ".join([expr.pretty() for expr in self.exprs])
def getexprs(self): return self.exprs
def compile(self, scope):
return heap.HeapList([expr.compile(scope) for expr in self.exprs])
# XXX bad encapsulation
def attr2str(box):
assert isinstance(box, StrBox)
return box.value
class SelectBox(EBox):
def __init__(self, expr, path, alt):
self.expr = expr
self.path = path
self.alt = alt
def pretty(self):
if self.alt is None:
return "%s.%s" % (self.expr.pretty(), self.path.pretty())
else:
return "%s.%s or %s" % (self.expr.pretty(), self.path.pretty(), self.alt.pretty())
def compile(self, scope):
assert self.alt is None, "can't compile yet"
return heap.HeapSelect(self.expr.compile(scope),
[attr2str(attr) for attr in self.path.getattrs()])
class VarBox(EBox):
def __init__(self, name): self.name = name
def pretty(self): return self.name
def compile(self, scope):
if self.name in scope: return scope[self.name]
raise MissingVar(self.name)
class AppBox(EBox):
def __init__(self, func, arg):
self.func = func
self.arg = arg
def pretty(self): return "(%s) (%s)" % (self.func.pretty(), self.arg.pretty())
def compile(self, scope):
return heap.HeapApp(self.func.compile(scope), self.arg.compile(scope))
class StrQLBox(EBox):
def __init__(self, init, pairs):
self.init = init
self.pairs = pairs
def pretty(self):
rv = [self.init]
for expr, piece in self.pairs:
rv.append("${ ")
rv.append(expr.pretty())
rv.append(" }")
rv.append(piece)
return '"%s"' % "".join(rv)
def compile(self, scope):
l = [heap.HeapStr(self.init)]
for expr, piece in self.pairs:
l.append(expr.compile(scope))
l.append(heap.HeapStr(piece))
return heap.HeapApp(
heap.HeapApp(heap.HeapSelect(scope["builtins"], ["concatStringsSep"]),
heap.HeapStr("")),
heap.HeapList(l))
class BindBox(RegiuxBox):
"A box for attrset attributes."
cls = "attrs"
class BindExprBox(BindBox):
def __init__(self, path, expr):
self.path = path
self.expr = expr
def pretty(self): return "%s = %s;" % (self.path.pretty(), self.expr.pretty())
def getnames(self):
return [attr2str(self.path.getattrs()[0])]
class BindInheritBox(BindBox):
def __init__(self, attrs, scope):
self.attrs = attrs
self.scope = scope
def pretty(self):
if self.scope:
return "inherit (%s) %s;" % (self.scope.pretty(), self.attrs.pretty())
else: return "inherit %s;" % self.attrs.pretty()
def getnames(self): return [attr2str(attr) for attr in self.attrs.getattrs()]
# XXX below boxes need cls and compile
class FormalBox(BaseBox):
def __init__(self, name, default):
self.name = name
self.default = default
def pretty(self):
if self.default is None:
return self.name
else:
return "%s ? %s" % (self.name, self.default.pretty())
class FormalsBox(BaseBox):
def __init__(self, params, hasEllipsis):
self.params = params
self.hasEllipsis = hasEllipsis
def pretty(self):
params = [param.pretty() for param in self.params]
if self.hasEllipsis: params.append("...")
return "{ %s }" % ", ".join(params)
def getparams(self): return self.params
def getellipsis(self): return self.hasEllipsis
class AttrPathBox(BaseBox):
def __init__(self, attrs): self.attrs = attrs
def pretty(self): return ".".join([attr.pretty() for attr in self.attrs])
def getattrs(self): return self.attrs
class AttrsBox(BaseBox):
def __init__(self, attrs): self.attrs = attrs
def pretty(self): return " ".join([attr.pretty() for attr in self.attrs])
def getattrs(self): return self.attrs
class ExprUnaryBox(BaseBox):
def __init__(self, expr, op):
self.expr = expr
self.op = op.getstr()
def pretty(self):
return "(%s%s)" % (self.op, self.expr.pretty())
class ExprBinaryBox(BaseBox):
def __init__(self, left, right, verb):
self.left = left
self.right = right
self.verb = verb
def pretty(self):
return "(%s %s %s)" % (self.verb, self.left.pretty(), self.right.pretty())
def compile(self, scope):
return heap.HeapApp(
heap.HeapApp(heap.HeapSelect(scope["builtins"], [self.verb]),
self.left.compile(scope)),
self.right.compile(scope))
class HasBox(BaseBox):
def __init__(self, value, path):
self.value = value
self.path = path
def pretty(self):
return "(%s ? %s)" % (self.value.pretty(), self.path.pretty())
class AssertBox(BaseBox):
def __init__(self, cond, expr):
self.cond = cond
self.expr = expr
def pretty(self): return "assert %s; %s" % (self.cond.pretty(), self.expr.pretty())
class LetBox(BaseBox):
def __init__(self, binds, expr):
self.binds = binds
self.expr = expr
def pretty(self): return "let %s in %s" % (self.binds.pretty(), self.expr.pretty())
class WithBox(BaseBox):
def __init__(self, scope, expr):
self.scope = scope
self.expr = expr
def pretty(self): return "with %s; %s" % (self.scope.pretty(), self.expr.pretty())
class WeirdLetBox(BaseBox):
def __init__(self, binds): self.binds = binds
def pretty(self): return "let " + self.binds.pretty()
class RecBox(BaseBox):
def __init__(self, binds): self.binds = binds
def pretty(self): return "rec " + self.binds.pretty()
class LambdaBox(BaseBox):
def __init__(self, binding, params, body):
self.binding = binding
self.params = params
self.body = body
def pretty(self):
body = self.body.pretty()
if self.binding and self.params:
return "%s@%s: %s" % (self.binding.pretty(), self.params.pretty(), body)
elif self.binding: return "%s: %s" % (self.binding.pretty(), body)
elif self.params: return "%s: %s" % (self.params.pretty(), body)
else: return "_: " + body
class IncompleteQL(BaseBox):
def __init__(self, pairs): self.pairs = pairs
pg = rply.ParserGenerator(KEYWORDS + [
"ID", "INT", "SPATH", "STRING", "URI",
"STRING_INIT", "STRING_PIECE", "STRING_END",
"AND", "IMPL", "OR_OP",
"EQ", "NEQ", "LE", "GE", "LEQ", "GEQ", "HAS",
"CONCAT", "UPDATE",
"DIV", "MINUS", "MUL", "PLUS",
"NEGATE", "NOT",
"COLON", "SEMI", "DOT", "COMMA", "AT", "EQUALS", "ELLIPSIS",
"OPEN_BRACE", "CLOSE_BRACE", "OPEN_BRACK", "CLOSE_BRACK", "OPEN_PAREN", "CLOSE_PAREN",
], precedence=[
("right", ["IMPL"]),
("left", ["OR_OP"]),
("left", ["AND"]),
("nonassoc", ["EQ", "NEQ"]),
("nonassoc", ["LE", "GE", "LEQ", "GEQ"]),
("right", ["UPDATE"]),
("left", ["NOT"]),
("left", ["PLUS", "MINUS"]),
("left", ["MUL", "DIV"]),
("right", ["CONCAT"]),
("nonassoc", ["HAS"]),
("nonassoc", ["NEGATE"]),
])
def trimBox(b, start, stop):
s = b.getstr()
if stop < 0: stop += len(s)
assert stop >= 0, "Invariant from lexer regex"
return s[start:stop]
class ParseError(Exception):
def __init__(self, token): self.token = token
@pg.error
def parseError(token): raise ParseError(token)
def constRule(rule, pb): pg.production(rule)(lambda _: pb)
def enclosedRule(rule, i): pg.production(rule)(lambda p: p[i])
def precRule(sup, sub): enclosedRule("expr%s : expr%s" % (sup, sub), 0)
SPINE = "", "_function", "_if", "_op", "_app", "_select", "_simple"
for sup, sub in zip(SPINE, SPINE[1:]): precRule(sup, sub)
@pg.production("expr_function : ID COLON expr_function")
def exprLambda(p): return LambdaBox(VarBox(p[0].getstr()), None, p[2])
@pg.production("expr_function : OPEN_BRACE formals CLOSE_BRACE COLON expr_function")
def exprLambda(p): return LambdaBox(None, p[1], p[4])
@pg.production("expr_function : OPEN_BRACE formals CLOSE_BRACE AT ID COLON expr_function")
def exprLambda(p): return LambdaBox(VarBox(p[4].getstr()), p[1], p[6])
@pg.production("expr_function : ID AT OPEN_BRACE formals CLOSE_BRACE COLON expr_function")
def exprLambda(p): return LambdaBox(VarBox(p[0].getstr()), p[3], p[6])
@pg.production("expr_function : ASSERT expr SEMI expr_function")
def exprAssert(p): return AssertBox(p[1], p[3])
@pg.production("expr_function : LET binds IN expr_function")
def exprLet(p): return LetBox(p[1], p[3])
@pg.production("expr_function : WITH expr SEMI expr_function")
def exprWith(p): return WithBox(p[1], p[3])
@pg.production("expr_if : IF expr THEN expr ELSE expr")
def exprIf(p): return IfBox(p[1], p[3], p[5])
@pg.production("expr_op : NEGATE expr_op | NOT expr_op")
def exprUnary(p): return ExprUnaryBox(p[1], p[0])
opDict = {
"*": "mul",
"+": "add",
"-": "sub",
}
@pg.production("expr_op : expr_op AND expr_op")
@pg.production("expr_op : expr_op CONCAT expr_op")
@pg.production("expr_op : expr_op DIV expr_op")
@pg.production("expr_op : expr_op EQ expr_op")
@pg.production("expr_op : expr_op GE expr_op")
@pg.production("expr_op : expr_op GEQ expr_op")
@pg.production("expr_op : expr_op IMPL expr_op")
@pg.production("expr_op : expr_op LE expr_op")
@pg.production("expr_op : expr_op LEQ expr_op")
@pg.production("expr_op : expr_op MINUS expr_op")
@pg.production("expr_op : expr_op MUL expr_op")
@pg.production("expr_op : expr_op NEQ expr_op")
@pg.production("expr_op : expr_op OR_OP expr_op")
@pg.production("expr_op : expr_op PLUS expr_op")
@pg.production("expr_op : expr_op UPDATE expr_op")
def exprBinary(p):
return ExprBinaryBox(p[0], p[2], opDict[p[1].getstr()])
@pg.production("expr_op : expr_op HAS attrpath")
def exprHas(p): return HasBox(p[0], p[2])
@pg.production("expr_app : expr_app expr_select")
def exprApp(p): return AppBox(p[0], p[1])
@pg.production("expr_select : expr_simple DOT attrpath | expr_simple DOT attrpath OR expr_select")
def exprSelect(p): return SelectBox(p[0], p[2], p[4] if len(p) == 5 else None)
@pg.production("expr_simple : ID")
def exprSimpleId(p): return VarBox(p[0].getstr())
@pg.production("expr_simple : INT")
def exprSimpleInt(p): return IntBox(int(p[0].getstr()))
@pg.production("expr_simple : STRING")
def exprQuoted(p): return StrBox(trimBox(p[0], 1, -1))
@pg.production("expr_simple : STRING_INIT string_ql")
def stringQL(p):
s = trimBox(p[0], 1, -2)
incomplete = p[1]
assert isinstance(incomplete, IncompleteQL)
return StrQLBox(s, incomplete.pairs)
# XXX should be left-recursive? Requires refactoring all string_ql rules
@pg.production("string_ql : expr STRING_PIECE string_ql")
def stringQLPiece(p):
incomplete = p[2]
assert isinstance(incomplete, IncompleteQL)
pairs = [(p[0], trimBox(p[1], 1, -2))] + incomplete.pairs
return IncompleteQL(pairs)
@pg.production("string_ql : expr STRING_END")
def stringQLEnd(p): return IncompleteQL([(p[0], trimBox(p[1], 1, -1))])
@pg.production("expr_simple : URI")
def exprURI(p): return StrBox(p[0].getstr())
@pg.production("expr_simple : LET OPEN_BRACE binds CLOSE_BRACE")
def exprWeirdLet(p): return WeirdLetBox(p[2])
@pg.production("expr_simple : REC OPEN_BRACE binds CLOSE_BRACE")
def exprRec(p): return RecBox(p[2])
enclosedRule("expr_simple : OPEN_BRACE binds CLOSE_BRACE", 1)
enclosedRule("expr_simple : OPEN_PAREN expr CLOSE_PAREN", 1)
enclosedRule("expr_simple : OPEN_BRACK expr_list CLOSE_BRACK", 1)
constRule("expr_list :", ListBox([]))
@pg.production("expr_list : expr_list expr_select")
def exprListCons(p): return ListBox(p[0].getexprs() + [p[1]])
@pg.production("binds : binds attrpath EQUALS expr SEMI")
def bindsExpr(p): return BindsBox(p[0].getbinds() + [BindExprBox(p[1], p[3])])
@pg.production("binds : binds INHERIT attrs SEMI")
def bindsInherit(p):
return BindsBox(p[0].getbinds() + [BindInheritBox(p[2], None)])
@pg.production("binds : binds INHERIT OPEN_PAREN expr CLOSE_PAREN attrs SEMI")
def bindsInherit(p):
return BindsBox(p[0].getbinds() + [BindInheritBox(p[5], p[3])])
constRule("binds :", BindsBox([]))
@pg.production("formals : formals COMMA formal")
def formalsComma(p):
return FormalsBox(p[0].getparams() + [p[2]], p[0].getellipsis())
@pg.production("formals : formal")
def formalsFormal(p): return FormalsBox([p[0]], False)
# XXX causes a conflict with "binds :"
constRule("formals :", FormalsBox([], False))
constRule("formals : ELLIPSIS", FormalsBox([], True))
@pg.production("formal : ID | ID HAS expr")
def formalId(p): return FormalBox(p[0].getstr(), p[2] if len(p) == 3 else None)
@pg.production("attrs : attrs attr")
def attrsAttr(p): return AttrsBox(p[0].getattrs() + [p[1]])
constRule("attrs :", AttrsBox([]))
@pg.production("attrpath : attrpath DOT attr")
def attrpathNil(p): return AttrPathBox(p[0].getattrs() + [p[2]])
@pg.production("attrpath : attr")
def attrpathCons(p): return AttrPathBox(p)
@pg.production("attr : ID")
def attrId(p): return StrBox(p[0].getstr())
parser = pg.build()
def printConflict(table, row):
print "Conflict:", row
st = row[0]
print "Action:", table.lr_action[st], "Goto:", table.lr_goto[st], "Reduction:", table.default_reductions[st]
def checkTable(table):
if table.sr_conflicts:
print("Shift/reduce conflicts:")
for row in table.sr_conflicts: printConflict(table, row)
if table.rr_conflicts:
print("Reduce/reduce conflicts:")
for row in table.rr_conflicts: printConflict(table, row)
# assert not table.sr_conflicts and not table.rr_conflicts
checkTable(parser.lr_table)
# Top-level interface: Lex and parse UTF-8 text.
def parse(expr): return parser.parse(lexer.lex(expr))