This commit is contained in:
Michael Zhang 2024-09-14 01:39:00 -05:00
parent e65f457530
commit 8942ee5638
7 changed files with 178 additions and 112 deletions

35
biome.json Normal file
View file

@ -0,0 +1,35 @@
{
"$schema": "https://biomejs.dev/schemas/1.9.0/schema.json",
"vcs": {
"enabled": false,
"clientKind": "git",
"useIgnoreFile": false
},
"files": {
"ignoreUnknown": false,
"ignore": []
},
"formatter": {
"enabled": true,
"indentStyle": "space",
"indentWidth": 2,
"lineWidth": 240
},
"organizeImports": {
"enabled": true
},
"linter": {
"enabled": true,
"rules": {
"recommended": true
}
},
"javascript": {
"formatter": {
"quoteStyle": "double",
"semicolons": "always",
"trailingCommas": "all",
"bracketSameLine": true
}
}
}

BIN
bun.lockb

Binary file not shown.

View file

@ -3,9 +3,13 @@
"module": "main.ts", "module": "main.ts",
"type": "module", "type": "module",
"devDependencies": { "devDependencies": {
"@biomejs/biome": "^1.9.0",
"@types/bun": "latest" "@types/bun": "latest"
}, },
"peerDependencies": { "peerDependencies": {
"typescript": "^5.0.0" "typescript": "^5.0.0"
} },
"trustedDependencies": [
"@biomejs/biome"
]
} }

1
src/codegen_x86.ts Normal file
View file

@ -0,0 +1 @@
export function codegen_x86() {}

View file

@ -1,12 +1,16 @@
import { parseArgs } from "node:util";
import { parseProgram } from "./parser"; import { parseProgram } from "./parser";
// Main // Main
async function main() { async function main() {
const filename = Bun.argv[2]; const args = parseArgs({
const contents = await Bun.file(filename).text() args: Bun.argv,
strict: true,
const programAst = parseProgram(contents); allowPositionals: true,
});
const filename = args.positionals[2];
const contents = await Bun.file(filename).text();
const programAst = parseProgram(contents);
} }
main(); main();

View file

@ -1,136 +1,157 @@
// Parsing combinators // Parsing combinators
const ok = <T>(r: T, e: string): Result<T> => ({ status: "ok", value: r, remain: e }); const ok = <T>(r: T, e: string): Result<T> => ({ status: "ok", value: r, remain: e });
const err = <T, E>(m: string, e?: E): Result<T> => ({ status: "err", msg: m, data: e }); const err = <T, E>(m: string, e?: E): Result<T> => ({ status: "err", msg: m, data: e });
const wrapErr = <T>(e: string, r: Result<T>): Result<T> => r.status === "ok" ? r : { ...r, msg: `${e}: ${r.msg}`} const wrapErr = <T>(e: string, r: Result<T>): Result<T> => (r.status === "ok" ? r : { ...r, msg: `${e}: ${r.msg}` });
type Result<T> = {status:"ok",value:T, remain:string} | {status:"err", msg: string, data?: unknown} type Result<T> = { status: "ok"; value: T; remain: string } | { status: "err"; msg: string; data?: unknown };
type Parser<T = unknown> = (i: string) => Result<T>; type Parser<T = unknown> = (i: string) => Result<T>;
const re = (r: RegExp): Parser<string> => (i: string) => { const re =
const r2 = r.source.startsWith("^") ? r : new RegExp(`^${r.source}`) (r: RegExp): Parser<string> =>
(i: string) => {
const r2 = r.source.startsWith("^") ? r : new RegExp(`^${r.source}`);
const m = r2.exec(i); const m = r2.exec(i);
if (!m) return err("failed to match " + r.source) if (!m) return err("failed to match " + r.source);
return ok(m[0], i.slice(m[0].length)) return ok(m[0], i.slice(m[0].length));
}; };
const map = <T, U>(p: Parser<T>, f: (_: T) => U): Parser<U> => (i: string) => { const map =
<T, U>(p: Parser<T>, f: (_: T) => U): Parser<U> =>
(i: string) => {
const res = p(i); const res = p(i);
if (res.status === "ok") return {...res, value: f(res.value)} if (res.status === "ok") return { ...res, value: f(res.value) };
else return res; else return res;
}; };
const opt = <T>(p: Parser<T>): Parser<T|null> => (i: string) => p(i).status === "ok" ? p(i) : ok(null,i); const opt =
const seq = (...parsers) => (i: string) => parsers.reduce((acc: Result<unknown[]>, next: Parser, idx) => { <T>(p: Parser<T>): Parser<T | null> =>
if (acc.status === "err") return acc; (i: string) =>
const res = next(acc.remain); p(i).status === "ok" ? p(i) : ok(null, i);
if (res.status === "err") return wrapErr(`failed seq #${idx}`, res); const seq =
return ok([...acc.value, res.value], res.remain) (...parsers) =>
}, ok([], i)); (i: string) =>
const alt = (...parsers) => (i: string) => { parsers.reduce(
(acc: Result<unknown[]>, next: Parser, idx) => {
if (acc.status === "err") return acc;
const res = next(acc.remain);
if (res.status === "err") return wrapErr(`failed seq #${idx}`, res);
return ok([...acc.value, res.value], res.remain);
},
ok([], i),
);
const alt =
(...parsers) =>
(i: string) => {
const res = parsers.reduce((acc: Result<unknown>, next: Parser) => { const res = parsers.reduce((acc: Result<unknown>, next: Parser) => {
if (acc.status === "ok") return acc; if (acc.status === "ok") return acc;
return next(i) return next(i);
}, err("nothing matched")); }, err("nothing matched"));
if (res.status === "err") return err("failed alt") if (res.status === "err") return err("failed alt");
return res; return res;
}; };
// whitespace // whitespace
const __: Parser = re(/\s+/) const __: Parser = re(/\s+/);
const _: Parser = re(/\s*/) const _: Parser = re(/\s*/);
// Grammar // Grammar
const ident: Parser = re(/(\_[A-Za-z0-9_]+)|([A-Za-z][A-Za-z0-9_]*)/); const ident: Parser = re(/(\_[A-Za-z0-9_]+)|([A-Za-z][A-Za-z0-9_]*)/);
const kwd = (s: string) => (i: string) => { const kwd = (s: string) => (i: string) => {
const res = ident(i) const res = ident(i);
if (res.status === "err") return wrapErr(`expected ${s}`, res) if (res.status === "err") return wrapErr(`expected ${s}`, res);
if (s !== res.value) return err(`expected ${s}`) if (s !== res.value) return err(`expected ${s}`);
return res return res;
} };
const ty: Parser = alt( const ty: Parser = alt(kwd("uint"), kwd("int"), kwd("ptr"), kwd("str"), ident);
kwd("uint"), kwd("int"), kwd("ptr"),
kwd("str"), ident,
)
const exprL: Parser = alt( const exprL: Parser = alt(
seq(re(/\(/), _, (i) => expr(i), _, re(/\)/)), seq(re(/\(/), _, (i) => expr(i), _, re(/\)/)),
map(re(/(\+|\-)?[0-9]+/), (n) => ({expr:"intLit", value:parseInt(n)})), map(re(/(\+|\-)?[0-9]+/), (n) => ({ expr: "intLit", value: parseInt(n) })),
map(ident, (name) => ({expr:"ident", name})), map(ident, (name) => ({ expr: "ident", name })),
) );
const expr2: Parser = alt( const expr2: Parser = alt(
map(seq(exprL, _, re(/</), _, exprL), ([left,,,,right]) => ({expr:"<", left, right})), map(seq(exprL, _, re(/</), _, exprL), ([left, , , , right]) => ({ expr: "<", left, right })),
map(seq(exprL, _, re(/>/), _, exprL), ([left,,,,right]) => ({expr:">", left, right})), map(seq(exprL, _, re(/>/), _, exprL), ([left, , , , right]) => ({ expr: ">", left, right })),
exprL) exprL,
);
const expr1: Parser = alt( const expr1: Parser = alt(
map(seq(expr2, _, re(/==/), _, expr2), ([left,,,,right]) => ({expr: "==", left, right})), map(seq(expr2, _, re(/==/), _, expr2), ([left, , , , right]) => ({ expr: "==", left, right })),
expr2) expr2,
export const expr: Parser = expr1 );
export const expr: Parser = expr1;
export const top: Parser = alt( export const top: Parser = alt(
map(seq(kwd("extern"), _, ident), (([, name]) => ({type: "extern", name}))), map(seq(kwd("extern"), _, ident), ([, name]) => ({ type: "extern", name })),
seq(kwd("struct"), _, ident, _, re(/:/)), seq(kwd("struct"), _, ident, _, re(/:/)),
map(seq(kwd("fn"), _, ident, _, re(/:/)), (([,, name]) => ({type: "func", name}))), map(seq(kwd("fn"), _, ident, _, re(/:/)), ([, , name]) => ({ type: "func", name })),
) );
export const stmt: Parser = alt( export const stmt: Parser = alt(
map(seq(kwd("let"), _, ident, _, re(/=/), _, (i) => expr(i)), map(
([,, name,,,, value]) => ({stmt: "let",name, value})), seq(kwd("let"), _, ident, _, re(/=/), _, (i) => expr(i)),
map(seq(kwd("if"), _, (i) => expr(i), _, re(/:/)), ([,, cond]) => ({stmt: "if", cond})), ([, , name, , , , value]) => ({ stmt: "let", name, value }),
map((i) => expr(i), (expr) => ({stmt:"expr", expr})), ),
) map(
seq(kwd("if"), _, (i) => expr(i), _, re(/:/)),
([, , cond]) => ({ stmt: "if", cond }),
),
map(
(i) => expr(i),
(expr) => ({ stmt: "expr", expr }),
),
);
// Parsing driver // Parsing driver
interface Program { interface Program {
structs: object[] structs: object[];
functions: object[] functions: object[];
} }
export function parseProgram(input: string): Program { export function parseProgram(input: string): Program {
let currentFunc: string | null = null; let currentFunc: string | null = null;
let indentStack = [0]; let indentStack = [0];
let expectIndent = false; let expectIndent = false;
for (const line of input.split(/\r?\n/)) { for (const line of input.split(/\r?\n/)) {
const leadingWhitespace = /^(?<space>\s*)(?<rest>.*)$/.exec(line) const leadingWhitespace = /^(?<space>\s*)(?<rest>.*)$/.exec(line);
const numSpaces = leadingWhitespace?.groups?.space?.length ?? 0; const numSpaces = leadingWhitespace?.groups?.space?.length ?? 0;
const rest = leadingWhitespace?.groups?.rest ?? ""; const rest = leadingWhitespace?.groups?.rest ?? "";
console.log("stack", indentStack) console.log("stack", indentStack);
const lastIndent = indentStack[indentStack.length - 1]; const lastIndent = indentStack[indentStack.length - 1];
if (numSpaces > lastIndent) { if (numSpaces > lastIndent) {
if (expectIndent) { if (expectIndent) {
indentStack.push(numSpaces) indentStack.push(numSpaces);
expectIndent = false; expectIndent = false;
} else { } else {
// Possible error? // Possible error?
} }
} else if (numSpaces === lastIndent) { } else if (numSpaces === lastIndent) {
if (expectIndent) { if (expectIndent) {
console.log("empty block") console.log("empty block");
} }
} else { } else {
indentStack.pop(); indentStack.pop();
console.log("dedented") console.log("dedented");
}
if (numSpaces === 0) {
// Parse top level
const result = opt(top)(rest)
console.log("top", JSON.stringify(result))
if (result.status === "ok") {
if (result.value === null) continue;
switch(result.value.type) {
case "func":
currentFunc = result.value.name;
expectIndent = true;
break;
}
}
} else if (currentFunc) {
const result = stmt(rest);
console.log("stmt", JSON.stringify(rest), JSON.stringify(result))
}
} }
if (numSpaces === 0) {
// Parse top level
const result = opt(top)(rest);
console.log("top", JSON.stringify(result));
if (result.status === "ok") {
if (result.value === null) continue;
switch (result.value.type) {
case "func":
currentFunc = result.value.name;
expectIndent = true;
break;
}
}
} else if (currentFunc) {
const result = stmt(rest);
console.log("stmt", JSON.stringify(rest), JSON.stringify(result));
}
}
} }
// Codegen // Codegen

View file

@ -1,8 +1,9 @@
import {test,expect} from "bun:test" import { test, expect } from "bun:test";
import { stmt } from "../src/parser" import { stmt } from "../src/parser";
test("ifx", () => expect(stmt("ifx")).toMatchObject({status:"ok", value: {expr: {}}, remain: ""})) test("ifx", () => expect(stmt("ifx")).toMatchObject({ status: "ok", value: { expr: {} }, remain: "" }));
test("ifx:", () => expect(stmt("ifx:")).toMatchObject({status:"ok", value: {expr: {}}, remain: ":"})) test("ifx:", () => expect(stmt("ifx:")).toMatchObject({ status: "ok", value: { expr: {} }, remain: ":" }));
test("if x:", () => expect(stmt("if x:")).toMatchObject({status:"ok", value: {stmt: "if"}, remain: ""})) test("if x:", () => expect(stmt("if x:")).toMatchObject({ status: "ok", value: { stmt: "if" }, remain: "" }));
test("if(x):", () => expect(stmt("if(x):")).toMatchObject({status:"ok", value: {stmt: "if"}, remain: ""})) test("if(x):", () => expect(stmt("if(x):")).toMatchObject({ status: "ok", value: { stmt: "if" }, remain: "" }));
test("let let = let", () => expect(stmt("let let = let")).toMatchObject({status:"ok", value: {stmt: "let"}, remain: ""}))
// test("let let = let", () => expect(stmt("let let = let")).toMatchObject({status:"ok", value: {stmt: "let"}, remain: ""}))