137 lines
4.8 KiB
TypeScript
137 lines
4.8 KiB
TypeScript
|
// Parsing combinators
|
||
|
const ok = <T>(r: T, e: string): Result<T> => ({ status: "ok", value: r, remain: e });
|
||
|
const err = <T, E>(m: string, e?: E): Result<T> => ({ status: "err", msg: m, data: e });
|
||
|
const wrapErr = <T>(e: string, r: Result<T>): Result<T> => r.status === "ok" ? r : { ...r, msg: `${e}: ${r.msg}`}
|
||
|
|
||
|
type Result<T> = {status:"ok",value:T, remain:string} | {status:"err", msg: string, data?: unknown}
|
||
|
type Parser<T = unknown> = (i: string) => Result<T>;
|
||
|
|
||
|
const re = (r: RegExp): Parser<string> => (i: string) => {
|
||
|
const r2 = r.source.startsWith("^") ? r : new RegExp(`^${r.source}`)
|
||
|
const m = r2.exec(i);
|
||
|
if (!m) return err("failed to match " + r.source)
|
||
|
return ok(m[0], i.slice(m[0].length))
|
||
|
};
|
||
|
const map = <T, U>(p: Parser<T>, f: (_: T) => U): Parser<U> => (i: string) => {
|
||
|
const res = p(i);
|
||
|
if (res.status === "ok") return {...res, value: f(res.value)}
|
||
|
else return res;
|
||
|
};
|
||
|
const opt = <T>(p: Parser<T>): Parser<T|null> => (i: string) => p(i).status === "ok" ? p(i) : ok(null,i);
|
||
|
const seq = (...parsers) => (i: string) => parsers.reduce((acc: Result<unknown[]>, next: Parser, idx) => {
|
||
|
if (acc.status === "err") return acc;
|
||
|
const res = next(acc.remain);
|
||
|
if (res.status === "err") return wrapErr(`failed seq #${idx}`, res);
|
||
|
return ok([...acc.value, res.value], res.remain)
|
||
|
}, ok([], i));
|
||
|
const alt = (...parsers) => (i: string) => {
|
||
|
const res = parsers.reduce((acc: Result<unknown>, next: Parser) => {
|
||
|
if (acc.status === "ok") return acc;
|
||
|
return next(i)
|
||
|
}, err("nothing matched"));
|
||
|
if (res.status === "err") return err("failed alt")
|
||
|
return res;
|
||
|
};
|
||
|
|
||
|
const _: Parser = re(/\s*/) // whitespace
|
||
|
|
||
|
// Grammar
|
||
|
const ident: Parser = re(/(\_[A-Za-z0-9_]+)|([A-Za-z][A-Za-z0-9_]*)/);
|
||
|
const ty: Parser = alt(
|
||
|
re(/uint/), re(/int/), re(/ptr/),
|
||
|
re(/str/), ident,
|
||
|
)
|
||
|
|
||
|
const exprL: Parser = alt(
|
||
|
seq(re(/\(/), _, (i) => expr(i), _, re(/\)/)),
|
||
|
map(re(/(\+|\-)?[0-9]+/), (n) => ({expr:"intLit", value:parseInt(n)})),
|
||
|
map(ident, (name) => ({expr:"ident", name})),
|
||
|
)
|
||
|
const expr2: Parser = alt(
|
||
|
map(seq(exprL, _, re(/</), _, exprL), ([left,,,,right]) => ({expr:"<", left, right})),
|
||
|
map(seq(exprL, _, re(/>/), _, exprL), ([left,,,,right]) => ({expr:">", left, right})),
|
||
|
exprL)
|
||
|
const expr1: Parser = alt(
|
||
|
map(seq(expr2, _, re(/==/), _, expr2), ([left,,,,right]) => ({expr: "==", left, right})),
|
||
|
expr2)
|
||
|
const expr: Parser = expr1
|
||
|
|
||
|
const top: Parser = alt(
|
||
|
map(seq(re(/extern/), _, ident), (([, name]) => ({type: "extern", name}))),
|
||
|
seq(re(/struct/), _, ident, _, re(/:/)),
|
||
|
map(seq(re(/fn/), _, ident, _, re(/:/)), (([,, name]) => ({type: "func", name}))),
|
||
|
)
|
||
|
const stmt: Parser = alt(
|
||
|
map(seq(re(/let/), _, ident, _, re(/=/), _, (i) => expr(i)),
|
||
|
([,, name,,,, value]) => ({stmt: "let",name, value})),
|
||
|
map(seq(re(/if/), _, (i) => expr(i), _, re(/:/)), ([,, cond]) => ({stmt: "if", cond})),
|
||
|
map((i) => expr(i), (expr) => ({stmt:"expr", expr})),
|
||
|
)
|
||
|
|
||
|
|
||
|
// Parsing driver
|
||
|
interface Program {
|
||
|
structs: object[]
|
||
|
functions: object[]
|
||
|
}
|
||
|
|
||
|
function parseProgram(input: string): Program {
|
||
|
let currentFunc: string | null = null;
|
||
|
let indentStack = [0];
|
||
|
let expectIndent = false;
|
||
|
|
||
|
for (const line of input.split(/\r?\n/)) {
|
||
|
const leadingWhitespace = /^(?<space>\s*)(?<rest>.*)$/.exec(line)
|
||
|
const numSpaces = leadingWhitespace?.groups?.space?.length ?? 0;
|
||
|
const rest = leadingWhitespace?.groups?.rest ?? "";
|
||
|
|
||
|
console.log("stack", indentStack)
|
||
|
|
||
|
const lastIndent = indentStack[indentStack.length - 1];
|
||
|
if (numSpaces > lastIndent) {
|
||
|
if (expectIndent) {
|
||
|
indentStack.push(numSpaces)
|
||
|
expectIndent = false;
|
||
|
} else {
|
||
|
// Possible error?
|
||
|
}
|
||
|
} else if (numSpaces === lastIndent) {
|
||
|
if (expectIndent) {
|
||
|
console.log("empty block")
|
||
|
}
|
||
|
} else {
|
||
|
indentStack.pop();
|
||
|
console.log("dedented")
|
||
|
}
|
||
|
|
||
|
if (numSpaces === 0) {
|
||
|
// Parse top level
|
||
|
const result = opt(top)(rest)
|
||
|
console.log("top", JSON.stringify(result))
|
||
|
if (result.status === "ok") {
|
||
|
if (result.value === null) continue;
|
||
|
switch(result.value.type) {
|
||
|
case "func":
|
||
|
currentFunc = result.value.name;
|
||
|
expectIndent = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
} else if (currentFunc) {
|
||
|
const result = stmt(rest);
|
||
|
console.log("stmt", JSON.stringify(rest), JSON.stringify(result))
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Codegen
|
||
|
|
||
|
// Main
|
||
|
async function main() {
|
||
|
const filename = Bun.argv[2];
|
||
|
const contents = await Bun.file(filename).text()
|
||
|
|
||
|
const programAst = parseProgram(contents);
|
||
|
}
|
||
|
|
||
|
main();
|