produce a parse tree
This commit is contained in:
parent
6e7563c84c
commit
500520b466
6 changed files with 119 additions and 23 deletions
7
Justfile
7
Justfile
|
@ -1,2 +1,7 @@
|
||||||
watch:
|
watch:
|
||||||
watchexec -ce py,lark,ag -i gen 'mypy *.py && python agmain.py && mypy gen/*.py'
|
watchexec -ce py,lark,ag -i gen 'just run'
|
||||||
|
|
||||||
|
run:
|
||||||
|
mypy *.py
|
||||||
|
python agmain.py
|
||||||
|
mypy gen/*.py
|
||||||
|
|
33
agast.py
33
agast.py
|
@ -1,5 +1,27 @@
|
||||||
from typing import *
|
from typing import *
|
||||||
from lark import Transformer, Tree, Token
|
from lark import Transformer, Tree, Token
|
||||||
|
import re
|
||||||
|
from re import Pattern
|
||||||
|
|
||||||
|
def unescape(s: str) -> str:
|
||||||
|
q = s[0]
|
||||||
|
t = ""
|
||||||
|
i = 1
|
||||||
|
escaped = False
|
||||||
|
while i < len(s):
|
||||||
|
c = s[i]
|
||||||
|
if escaped:
|
||||||
|
if c == q: t += q
|
||||||
|
elif c == 'n': t += '\n'
|
||||||
|
elif c == 't': t += '\t'
|
||||||
|
if c == q: break
|
||||||
|
if c == '\\':
|
||||||
|
escaped = True
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
t += c
|
||||||
|
i += 1
|
||||||
|
return t
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
@ -40,8 +62,16 @@ class NodeRefByName(NodeRef, str):
|
||||||
def __init__(self, name: str):
|
def __init__(self, name: str):
|
||||||
self.name = name
|
self.name = name
|
||||||
def __repr__(self) -> str: return f"NodeRefByName({self.name})"
|
def __repr__(self) -> str: return f"NodeRefByName({self.name})"
|
||||||
|
class NodeRegex(NodeRef):
|
||||||
|
def __init__(self, pat: str):
|
||||||
|
self.pat = re.compile(unescape(pat))
|
||||||
|
def __repr__(self) -> str: return f"NodeRegex({self.pat.pattern})"
|
||||||
|
|
||||||
class Sym: pass
|
class Sym: pass
|
||||||
|
class SymLit(Sym):
|
||||||
|
def __init__(self, s: str):
|
||||||
|
self.lit = unescape(s)
|
||||||
|
def __repr__(self) -> str: return f"SymLit({repr(self.lit)})"
|
||||||
class SymRename(Sym):
|
class SymRename(Sym):
|
||||||
def __init__(self, name: str, ty: NodeRef):
|
def __init__(self, name: str, ty: NodeRef):
|
||||||
self.name = name
|
self.name = name
|
||||||
|
@ -113,6 +143,7 @@ class Parser(Transformer[List[Decl]]):
|
||||||
[name, ifaces, variants] = items
|
[name, ifaces, variants] = items
|
||||||
return Node(name, ifaces, variants)
|
return Node(name, ifaces, variants)
|
||||||
def node_ref_name(self, items: List[str]) -> NodeRefByName: return NodeRefByName(items[0])
|
def node_ref_name(self, items: List[str]) -> NodeRefByName: return NodeRefByName(items[0])
|
||||||
|
def node_regex(self, items: List[str]) -> NodeRegex: return NodeRegex(items[0])
|
||||||
|
|
||||||
# variants
|
# variants
|
||||||
def variants(self, items: List[Variant]) -> List[Variant]: return items
|
def variants(self, items: List[Variant]) -> List[Variant]: return items
|
||||||
|
@ -121,6 +152,8 @@ class Parser(Transformer[List[Decl]]):
|
||||||
return Variant(prod, equations)
|
return Variant(prod, equations)
|
||||||
def prod(self, items: List[Sym]) -> List[Sym]: return items
|
def prod(self, items: List[Sym]) -> List[Sym]: return items
|
||||||
|
|
||||||
|
# symbols in productions
|
||||||
|
def sym_lit(self, items: List[str]) -> Sym: return SymLit(items[0])
|
||||||
def sym_rename(self, items: List[Any]) -> Sym: return SymRename(items[0], items[1])
|
def sym_rename(self, items: List[Any]) -> Sym: return SymRename(items[0], items[1])
|
||||||
|
|
||||||
# equations
|
# equations
|
||||||
|
|
72
aggen.py
72
aggen.py
|
@ -2,6 +2,9 @@ from typing import *
|
||||||
import textwrap
|
import textwrap
|
||||||
import re
|
import re
|
||||||
import copy
|
import copy
|
||||||
|
import json
|
||||||
|
from collections import defaultdict
|
||||||
|
from re import Pattern
|
||||||
|
|
||||||
from agast import *
|
from agast import *
|
||||||
|
|
||||||
|
@ -10,17 +13,33 @@ i = 0
|
||||||
|
|
||||||
class GenResult:
|
class GenResult:
|
||||||
def __init__(self, pd: str = "", ex: str = ""):
|
def __init__(self, pd: str = "", ex: str = ""):
|
||||||
self.parser_data = pd
|
self.literals: Dict[str, str] = dict()
|
||||||
|
self.parse_rules: defaultdict[str, List[str]] = defaultdict(list)
|
||||||
|
self.starts: Set[str] = set()
|
||||||
self.extra = ex
|
self.extra = ex
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parser_data(self) -> str:
|
||||||
|
s = []
|
||||||
|
for sym, pat in self.literals.items():
|
||||||
|
s.append(f"{sym}: {pat}")
|
||||||
|
for name, rules in self.parse_rules.items():
|
||||||
|
srules = " | ".join(rules)
|
||||||
|
s.append(f"{name}: {srules}")
|
||||||
|
s.append("%import common.WS")
|
||||||
|
s.append("%ignore WS")
|
||||||
|
return "\n".join(s)
|
||||||
|
|
||||||
def gen(program: List[Decl]) -> GenResult:
|
def gen(program: List[Decl]) -> GenResult:
|
||||||
res = GenResult()
|
res = GenResult()
|
||||||
|
|
||||||
def gen(prefix: str = "", suffix: str = "") -> str:
|
def gen(prefix: str = "", suffix: str = "") -> str:
|
||||||
global i
|
global i
|
||||||
presan = re.sub("[^0-9a-zA-Z]+", "_", prefix)
|
presan = re.sub("[^0-9a-zA-Z]+", "_", prefix)
|
||||||
sufsan = re.sub("[^0-9a-zA-Z]+", "_", suffix)
|
sufsan = re.sub("[^0-9a-zA-Z]+", "_", suffix)
|
||||||
i += 1
|
i += 1
|
||||||
return f"{presan}{i}{sufsan}"
|
return f"{presan}{i}{sufsan}"
|
||||||
|
|
||||||
def v(name: str) -> str:
|
def v(name: str) -> str:
|
||||||
return f"__ag_{name}"
|
return f"__ag_{name}"
|
||||||
|
|
||||||
|
@ -83,27 +102,62 @@ def gen(program: List[Decl]) -> GenResult:
|
||||||
return dict()
|
return dict()
|
||||||
raise Exception(f"unhandled {expr.__class__}")
|
raise Exception(f"unhandled {expr.__class__}")
|
||||||
|
|
||||||
for node in filter(lambda c: isinstance(c, Node), program):
|
node_map = dict(map(lambda n: (n.name, n), filter(lambda c: isinstance(c, Node), program)))
|
||||||
|
node_name_map = dict(map(lambda n: (n[0], gen(n[1].name.lower())), node_map.items()))
|
||||||
|
|
||||||
|
for node in node_map.values():
|
||||||
node = cast(Node, node)
|
node = cast(Node, node)
|
||||||
n_class_name = gen(node.name)
|
node_name_lower = node.name.lower()
|
||||||
|
node_name = node_name_map[node.name]
|
||||||
|
res.parse_rules[f"?{node_name_lower}"].append(node_name)
|
||||||
|
res.starts.add(node_name_lower)
|
||||||
|
|
||||||
class_decl = textwrap.dedent(f"""
|
class_decl = textwrap.dedent(f"""
|
||||||
class {v(n_class_name)}: pass
|
class {v(node_name)}: pass
|
||||||
""")
|
""")
|
||||||
res.extra += class_decl
|
res.extra += class_decl
|
||||||
|
|
||||||
print(node.name, node.ifaces)
|
print(node.name, node.ifaces)
|
||||||
|
|
||||||
for variant in node.variants:
|
for variant in node.variants:
|
||||||
v_class_name = gen(f"{n_class_name}_var")
|
v_class_name = gen(f"{node_name}_var")
|
||||||
class_decl = textwrap.dedent(f"""
|
class_decl = textwrap.dedent(f"""
|
||||||
class {v(v_class_name)}({v(n_class_name)}):
|
class {v(v_class_name)}({v(node_name)}):
|
||||||
''' '''
|
''' '''
|
||||||
pass
|
pass
|
||||||
""")
|
""")
|
||||||
res.extra += class_decl
|
res.extra += class_decl
|
||||||
|
|
||||||
prod_name = gen(node.name)
|
prod_name = gen(node_name + "_")
|
||||||
print(prod_name)
|
res.parse_rules[node_name].append(prod_name)
|
||||||
|
print("PRODUCTION", prod_name, variant.prod)
|
||||||
|
|
||||||
|
# resolving a production just means checking to make sure it's a
|
||||||
|
# type that exists or it's a regex
|
||||||
|
def resolve_production(sym: Sym) -> str:
|
||||||
|
print(f"resolve_production({sym})")
|
||||||
|
if isinstance(sym, SymRename):
|
||||||
|
if isinstance(sym.ty, NodeRefByName):
|
||||||
|
if sym.ty.name in node_name_map:
|
||||||
|
return node_name_map[sym.ty.name]
|
||||||
|
else:
|
||||||
|
raise Exception(f"unresolved name {sym.ty.name} in production")
|
||||||
|
elif isinstance(sym.ty, NodeRegex):
|
||||||
|
sym_name = gen("sym")
|
||||||
|
res.literals[sym_name] = f"/{sym.ty.pat.pattern}/"
|
||||||
|
return sym_name
|
||||||
|
elif isinstance(sym, SymLit):
|
||||||
|
sym_name = gen("lit")
|
||||||
|
# hack to make repr have double quotes
|
||||||
|
res.literals[sym_name] = json.dumps(sym.lit)
|
||||||
|
return sym_name
|
||||||
|
raise Exception(f"unhandled {sym.__class__}")
|
||||||
|
|
||||||
|
seq = []
|
||||||
|
for sym in variant.prod:
|
||||||
|
n = resolve_production(sym)
|
||||||
|
seq.append(n)
|
||||||
|
res.parse_rules[prod_name].append(" ".join(seq))
|
||||||
|
|
||||||
# create an environment for checking the equations based on
|
# create an environment for checking the equations based on
|
||||||
# the production
|
# the production
|
||||||
|
@ -135,6 +189,6 @@ def gen(program: List[Decl]) -> GenResult:
|
||||||
|
|
||||||
# this is a "type alias" that connects it to one of the generated
|
# this is a "type alias" that connects it to one of the generated
|
||||||
# names above
|
# names above
|
||||||
res.extra += f"{node.name} = {v(n_class_name)}"
|
res.extra += f"{node.name} = {v(node_name)}"
|
||||||
|
|
||||||
return res
|
return res
|
12
agmain.py
12
agmain.py
|
@ -41,15 +41,15 @@ if __name__ == "__main__":
|
||||||
if self.value is None:
|
if self.value is None:
|
||||||
self.value = self.func()
|
self.value = self.func()
|
||||||
return self.value
|
return self.value
|
||||||
parser = Lark('''start:
|
parser = Lark('''{pd}''', parser='lalr', start={starts}, debug=True)
|
||||||
{pd}''')
|
|
||||||
class Trans(Transformer[None]):
|
class Trans(Transformer[None]):
|
||||||
pass
|
pass
|
||||||
{ex}
|
{ex}
|
||||||
def parse(input: str) -> None:
|
def parse(input: str, start: Optional[str] = None) -> Any:
|
||||||
print(input)
|
return parser.parse(input, start)
|
||||||
""")
|
""")
|
||||||
f.write(fmt_str.format(pd=res.parser_data, ex=res.extra))
|
f.write(fmt_str.format(pd=res.parser_data, ex=res.extra, starts=list(res.starts)))
|
||||||
|
|
||||||
mod = importlib.import_module("gen.arith")
|
mod = importlib.import_module("gen.arith")
|
||||||
mod.parse("1 + 2 * 3") # type: ignore
|
print(mod.parse("1 + 2 * 3", start="expr")) # type: ignore
|
||||||
|
|
||||||
|
|
2
arith.ag
2
arith.ag
|
@ -9,5 +9,5 @@ node Expr : HasValue {
|
||||||
<l:Expr> "*" <r:Expr> => {
|
<l:Expr> "*" <r:Expr> => {
|
||||||
self.val = l.val * r.val;
|
self.val = l.val * r.val;
|
||||||
}
|
}
|
||||||
<n:r"[0-9]+"> => { self.val = parseInt(n); }
|
<n:"[0-9]+"> => { self.val = parseInt(n); }
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,11 +18,13 @@ variants: variant*
|
||||||
variant: prod "=>" "{" equations "}"
|
variant: prod "=>" "{" equations "}"
|
||||||
prod: sym*
|
prod: sym*
|
||||||
?sym: sym_rename
|
?sym: sym_rename
|
||||||
| STRING
|
| sym_lit
|
||||||
|
sym_lit: ESCAPED_STRING
|
||||||
sym_rename: "<" ident ":" node_ref ">"
|
sym_rename: "<" ident ":" node_ref ">"
|
||||||
?node_ref: node_ref_name
|
?node_ref: node_ref_name
|
||||||
| STRING
|
| node_regex
|
||||||
node_ref_name: ident
|
node_ref_name: ident
|
||||||
|
node_regex: ESCAPED_STRING
|
||||||
equations: equation_semi*
|
equations: equation_semi*
|
||||||
equation_semi: equation ";"
|
equation_semi: equation ";"
|
||||||
// TODO: the left side should really be a separate type
|
// TODO: the left side should really be a separate type
|
||||||
|
@ -56,5 +58,7 @@ IDENT: /([a-zA-Z][a-zA-Z0-9_]*)|(_[a-zA-Z0-9_]+)/
|
||||||
|
|
||||||
%import python.STRING
|
%import python.STRING
|
||||||
%import common.WS
|
%import common.WS
|
||||||
|
%import common.ESCAPED_STRING
|
||||||
|
|
||||||
%ignore WS
|
%ignore WS
|
||||||
%ignore COMMENT
|
%ignore COMMENT
|
Loading…
Reference in a new issue