// Parsing combinators const ok = (r: T, e: string): Result => ({ status: "ok", value: r, remain: e }); const err = (m: string, e?: E): Result => ({ status: "err", msg: m, data: e }); const wrapErr = (e: string, r: Result): Result => r.status === "ok" ? r : { ...r, msg: `${e}: ${r.msg}`} type Result = {status:"ok",value:T, remain:string} | {status:"err", msg: string, data?: unknown} type Parser = (i: string) => Result; const re = (r: RegExp): Parser => (i: string) => { const r2 = r.source.startsWith("^") ? r : new RegExp(`^${r.source}`) const m = r2.exec(i); if (!m) return err("failed to match " + r.source) return ok(m[0], i.slice(m[0].length)) }; const map = (p: Parser, f: (_: T) => U): Parser => (i: string) => { const res = p(i); if (res.status === "ok") return {...res, value: f(res.value)} else return res; }; const opt = (p: Parser): Parser => (i: string) => p(i).status === "ok" ? p(i) : ok(null,i); const seq = (...parsers) => (i: string) => parsers.reduce((acc: Result, next: Parser, idx) => { if (acc.status === "err") return acc; const res = next(acc.remain); if (res.status === "err") return wrapErr(`failed seq #${idx}`, res); return ok([...acc.value, res.value], res.remain) }, ok([], i)); const alt = (...parsers) => (i: string) => { const res = parsers.reduce((acc: Result, next: Parser) => { if (acc.status === "ok") return acc; return next(i) }, err("nothing matched")); if (res.status === "err") return err("failed alt") return res; }; const _: Parser = re(/\s*/) // whitespace // Grammar const ident: Parser = re(/(\_[A-Za-z0-9_]+)|([A-Za-z][A-Za-z0-9_]*)/); const ty: Parser = alt( re(/uint/), re(/int/), re(/ptr/), re(/str/), ident, ) const exprL: Parser = alt( seq(re(/\(/), _, (i) => expr(i), _, re(/\)/)), map(re(/(\+|\-)?[0-9]+/), (n) => ({expr:"intLit", value:parseInt(n)})), map(ident, (name) => ({expr:"ident", name})), ) const expr2: Parser = alt( map(seq(exprL, _, re(/ ({expr:"<", left, right})), map(seq(exprL, _, re(/>/), _, exprL), ([left,,,,right]) => ({expr:">", left, right})), exprL) const expr1: Parser = alt( map(seq(expr2, _, re(/==/), _, expr2), ([left,,,,right]) => ({expr: "==", left, right})), expr2) const expr: Parser = expr1 const top: Parser = alt( map(seq(re(/extern/), _, ident), (([, name]) => ({type: "extern", name}))), seq(re(/struct/), _, ident, _, re(/:/)), map(seq(re(/fn/), _, ident, _, re(/:/)), (([,, name]) => ({type: "func", name}))), ) const stmt: Parser = alt( map(seq(re(/let/), _, ident, _, re(/=/), _, (i) => expr(i)), ([,, name,,,, value]) => ({stmt: "let",name, value})), map(seq(re(/if/), _, (i) => expr(i), _, re(/:/)), ([,, cond]) => ({stmt: "if", cond})), map((i) => expr(i), (expr) => ({stmt:"expr", expr})), ) // Parsing driver interface Program { structs: object[] functions: object[] } function parseProgram(input: string): Program { let currentFunc: string | null = null; let indentStack = [0]; let expectIndent = false; for (const line of input.split(/\r?\n/)) { const leadingWhitespace = /^(?\s*)(?.*)$/.exec(line) const numSpaces = leadingWhitespace?.groups?.space?.length ?? 0; const rest = leadingWhitespace?.groups?.rest ?? ""; console.log("stack", indentStack) const lastIndent = indentStack[indentStack.length - 1]; if (numSpaces > lastIndent) { if (expectIndent) { indentStack.push(numSpaces) expectIndent = false; } else { // Possible error? } } else if (numSpaces === lastIndent) { if (expectIndent) { console.log("empty block") } } else { indentStack.pop(); console.log("dedented") } if (numSpaces === 0) { // Parse top level const result = opt(top)(rest) console.log("top", JSON.stringify(result)) if (result.status === "ok") { if (result.value === null) continue; switch(result.value.type) { case "func": currentFunc = result.value.name; expectIndent = true; break; } } } else if (currentFunc) { const result = stmt(rest); console.log("stmt", JSON.stringify(rest), JSON.stringify(result)) } } } // Codegen // Main async function main() { const filename = Bun.argv[2]; const contents = await Bun.file(filename).text() const programAst = parseProgram(contents); } main();