compiler?

This commit is contained in:
Michael Zhang 2023-09-08 05:10:52 -05:00
parent 6bb860f61a
commit f497f3536a
13 changed files with 412 additions and 19 deletions

28
package-lock.json generated
View file

@ -20,6 +20,7 @@
"katex": "^0.16.8", "katex": "^0.16.8",
"lodash-es": "^4.17.21", "lodash-es": "^4.17.21",
"mdast-util-to-string": "^4.0.0", "mdast-util-to-string": "^4.0.0",
"nanoid": "^4.0.2",
"reading-time": "^1.5.0", "reading-time": "^1.5.0",
"rehype-katex": "^6.0.3", "rehype-katex": "^6.0.3",
"remark-emoji": "^4.0.0", "remark-emoji": "^4.0.0",
@ -5976,9 +5977,9 @@
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
}, },
"node_modules/nanoid": { "node_modules/nanoid": {
"version": "3.3.6", "version": "4.0.2",
"resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.6.tgz", "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-4.0.2.tgz",
"integrity": "sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA==", "integrity": "sha512-7ZtY5KTCNheRGfEFxnedV5zFiORN1+Y1N6zvPTnHQd8ENUvfaDBeuJDZb2bN/oXwXxu3qkTXDzy57W5vAmDTBw==",
"funding": [ "funding": [
{ {
"type": "github", "type": "github",
@ -5986,10 +5987,10 @@
} }
], ],
"bin": { "bin": {
"nanoid": "bin/nanoid.cjs" "nanoid": "bin/nanoid.js"
}, },
"engines": { "engines": {
"node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" "node": "^14 || ^16 || >=18"
} }
}, },
"node_modules/napi-build-utils": { "node_modules/napi-build-utils": {
@ -6489,6 +6490,23 @@
"node": "^10 || ^12 || >=14" "node": "^10 || ^12 || >=14"
} }
}, },
"node_modules/postcss/node_modules/nanoid": {
"version": "3.3.6",
"resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.6.tgz",
"integrity": "sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/ai"
}
],
"bin": {
"nanoid": "bin/nanoid.cjs"
},
"engines": {
"node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
}
},
"node_modules/potrace": { "node_modules/potrace": {
"version": "2.1.8", "version": "2.1.8",
"resolved": "https://registry.npmjs.org/potrace/-/potrace-2.1.8.tgz", "resolved": "https://registry.npmjs.org/potrace/-/potrace-2.1.8.tgz",

View file

@ -22,6 +22,7 @@
"katex": "^0.16.8", "katex": "^0.16.8",
"lodash-es": "^4.17.21", "lodash-es": "^4.17.21",
"mdast-util-to-string": "^4.0.0", "mdast-util-to-string": "^4.0.0",
"nanoid": "^4.0.2",
"reading-time": "^1.5.0", "reading-time": "^1.5.0",
"rehype-katex": "^6.0.3", "rehype-katex": "^6.0.3",
"remark-emoji": "^4.0.0", "remark-emoji": "^4.0.0",

View file

@ -0,0 +1,7 @@
---
interface Props {}
const {} = Astro.props;
console.log("SHIET", Astro.props);
console.log("SHIET", await Astro.slots.render("default"));
---

View file

@ -20,10 +20,17 @@ const minDepth = Math.min(...headings.map((heading) => heading.depth));
<h3 class="title">Table of contents</h3> <h3 class="title">Table of contents</h3>
<ul> <ul>
{headings.map((heading) => { {headings.map((heading) => {
const depth = heading.depth - minDepth;
const padding = 10 * Math.pow(0.85, depth);
const fontSize = 14 * Math.pow(0.9, depth);
return ( return (
<li> <li>
<a href={`#${heading.slug}`} id={`${heading.slug}-link`}> <a
<span style={`padding-left: ${(heading.depth - minDepth) * 10}px;`}> href={`#${heading.slug}`}
id={`${heading.slug}-link`}
style={`padding: ${padding}px;`}
>
<span style={`padding-left: ${depth * 10}px; font-size: ${fontSize}px;`}>
{heading.text} {heading.text}
</span> </span>
</a> </a>

View file

@ -0,0 +1,48 @@
---
import { Code } from "astro:components";
interface Props {
code: string;
resultName?: string | string[];
}
let { code, resultName } = Astro.props;
// Detect common whitespace
let longestCommonWhitespace: number | null = null;
for (const line of code.split("\n")) {
if (line.trim().length === 0) continue;
const startingWhitespace = line.match(/^(\s+)/)!;
const len = startingWhitespace[1].length;
if (longestCommonWhitespace === null || len < longestCommonWhitespace)
longestCommonWhitespace = len;
}
code = code
.split("\n")
.map((line) => {
if (line.trim().length === 0) return "";
return line.substring(longestCommonWhitespace);
})
.join("\n")
.trim();
// Strip some characters from it
code = code.trim();
let scriptCode = code;
if (typeof resultName === "string") scriptCode += `\n${resultName};`;
else if (Array.isArray(resultName)) scriptCode += `\n[${resultName.join(", ")}];`;
---
<!-- <Code code={code} lang="js" theme="github-dark" /> -->
<Code code={code} lang="js" theme="css-variables" />
<script define:vars={{ resultName, scriptCode }}>
const result = eval?.(scriptCode);
if (typeof resultName === "string") window[resultName] = result;
else if (Array.isArray(resultName)) {
resultName.forEach((name, i) => {
window[name] = result[i];
});
}
</script>

View file

@ -0,0 +1,33 @@
---
import { nanoid } from "nanoid";
import "./style.scss";
interface Props {
label?: string;
id?: string;
runAction: string;
}
const { label, id, runAction } = Astro.props;
const codeId = id ?? nanoid();
const scriptCode = `
javascript:((displayResult) => {
${runAction}
})((result) => {
const el = document.getElementById("${codeId}");
el.innerText = result.toString();
const stamp = document.getElementById("${codeId}-stamp");
stamp.innerText = new Date().toISOString();
});
`;
---
<button onclick={scriptCode}>{label ?? "Run"}</button>
<div class="result">
<pre id={codeId}></pre>
<small>
Last run:
<span id={`${codeId}-stamp`}></span>
</small>
</div>

View file

@ -0,0 +1,244 @@
---
title: Compiler from scratch
date: 2023-09-08T06:17:00.840Z
tags:
- programming-languages
draft: true
toc: true
---
import CodeBlock from "./CodeBlock.astro";
import Playground from "./Playground.astro";
Just for fun, let's write a compiler that targets WebAssembly.
I'm writing this post as I'm discovering how this works, so join me on my journey!
## Producing a working "binary"
I don't know how WebAssembly actually works, so here's some of the resources I'm
consulting:
- https://developer.mozilla.org/en-US/docs/WebAssembly/Using_the_JavaScript_API
- https://webassembly.github.io/spec/core/index.html
A compiler for a general language is quite an undertaking, so let's start with
the proverbial "Hello, world" program, just to write some output to the screen.
This ...isn't very clear either. First of all, how do we even get output from
WebAssembly?
Well, it looks like according to [this][exported-functions] document, you can
essentially mark certain wasm concepts as "exported", and access them from
`obj.instance.exports`. Let's start by trying to export a single number.
[exported-functions]: https://developer.mozilla.org/en-US/docs/WebAssembly/Exported_functions
### Returning a number from WebAssembly
We can use tables to export a number from wasm to JavaScript so we can access it
and print it to the screen. Based on the [MDN example], we can tell that we'll
need to be able to export modules, functions, and tables. We can use the [binary
format spec] to figure out how to produce this info.
[mdn example]: https://github.com/mdn/webassembly-examples/blob/5a2dd7ca5c82d19ae9dd25d170e7ef5e9f23fbb7/js-api-examples/table.wat
[binary format spec]: https://webassembly.github.io/spec/core/binary/index.html
Starting off, a class that we can start writing binary data to:
<CodeBlock
code={`
function WasmWriter(size) {
this.buffer = new ArrayBuffer(size ?? 1024);
this.cursor = 0;
}
// Helper function for displaying the number of bytes written as an array
WasmWriter.prototype.asArray = function() { return [...new Uint8Array(this.buffer.slice(0, this.cursor))]; };
WasmWriter.prototype.display = function() { return "[" + this.asArray().map(x => x.toString(16).padStart(2, '0')).join(", ") + "]"; };
`}
/>
<Playground runAction={`displayResult(new WasmWriter().display());`} />
We'll want to write some stuff into it. Like bytes?
<CodeBlock
code={`
WasmWriter.prototype.write = function(obj) {
const len = obj.len?.();
const view = new Uint8Array(this.buffer);
obj.write({
recurse: (obj) => this.write(obj),
emit: (byte) => { view[this.cursor] = byte; this.cursor += 1 }
});
};
`}
/>
Or [integers][int spec]? Let's use the [algorithm given on Wikipedia][uleb algo]
here.
[int spec]: https://webassembly.github.io/spec/core/binary/values.html#integers
[uleb algo]: https://en.wikipedia.org/wiki/LEB128#Unsigned_LEB128
<CodeBlock
resultName="UInt"
code={`
class UInt {
constructor(num) { this.num = num; }
write({ emit }) {
let num = this.num;
if (num === 0) { emit(0); return }
do {
let byte = num % 128;
num = num >> 7;
if (num !== 0) byte = byte | 128;
emit(byte);
} while(num !== 0);
};
}
`}
/>
<Playground
label="Encode some ints"
runAction={`
let ints = [10, 100, 1000, 10000, 100_000];
displayResult(ints.map(x => {
const writer = new WasmWriter();
writer.write(new UInt(x));
return \`\${x} encodes to \${writer.display()}\`;
}).join("\\n"));
`}
/>
Perfect. What do we still need to encode a complete WebAssembly program? Reading
[this][binary modules spec], I guess we'll need functions, tables, and modules.
Let's keep going, starting with [functions][func type spec].
[binary modules spec]: https://webassembly.github.io/spec/core/binary/modules.html#binary-module
[func type spec]: https://webassembly.github.io/spec/core/binary/types.html#binary-functype
<CodeBlock
resultName={["Vec", "ResultType", "NumType", "FuncType"]}
code={`
class Vec {
constructor(items) { this.items = items; }
write({ recurse }) {
recurse(new UInt(this.items.length));
this.items.forEach(item => recurse(item));
}
}
class ResultType {
constructor(valTypes) { this.valTypes = valTypes; }
write({ recurse }) { recurse(new Vec(this.valTypes)); }
}
class NumType {
constructor(type) { this.type = type; }
write({ emit }) {
emit({ "i32": 0x7f, "i64": 0x7e,
"f32": 0x7d, "f64": 0x7c }[this.type]);
};
}
class FuncType {
constructor(rt1, rt2) { this.rt1 = rt1; this.rt2 = rt2; }
write({ emit, recurse }) { emit(0x60); recurse(this.rt1); recurse(this.rt2); };
}
`}
/>
If you run this, you'll see that it prints out what we expected:
- `0x60` designates that it's a function type
- `0x00` means the list of parameter types is empty
- `0x01` means the list of return types has 1 item
- that item is `0x7f`, corresponding to `i32`
<Playground
label="Encode [] -> [i32] function"
runAction={`
const writer = new WasmWriter();
writer.write(new FuncType(
new ResultType([]),
new ResultType([new NumType("i32")]),
));
displayResult(writer.display());
`}
/>
Now, on to [tables][table type spec]:
[table type spec]: https://webassembly.github.io/spec/core/binary/types.html#table-types
<CodeBlock
resultName={["TableType", "RefType", "Limit"]}
code={`
class TableType {
constructor(et, lim) { this.et = et; this.lim = lim; }
write({ recurse }) { recurse(this.et); recurse(this.lim); }
}
class RefType {
constructor(type) { this.type = type; }
write({ emit }) { emit({"func": 0x70, "extern": 0x6f}[this.type]) }
}
class Limit {
constructor(min, max) { this.min = min; this.max = max; }
write({ emit, recurse }) {
const min = new UInt(this.min), max = new UInt(this.max);
if (this.max) { emit(0x1); recurse(min); recurse(max); }
if (this.max) { emit(0x0); recurse(min); }
}
}
`}
/>
<Playground
label="Encode a table of functions with limit [1, 5]"
runAction={`
const writer = new WasmWriter();
writer.write(new TableType(
new RefType("func"),
new Limit(1, 5),
));
displayResult(writer.display());
`}
/>
#### Module
Ok, let's put this all together and make a [module][module spec]!
[module spec]: https://webassembly.github.io/spec/core/binary/modules.html#binary-module
<CodeBlock
resultName={["Module"]}
code={`
class Module {
constructor(sections) { this.sections = sections; }
write({ emit, recurse }) {
emit(0x00); emit(0x61); emit(0x73); emit(0x6d);
emit(0x01); emit(0x00); emit(0x00); emit(0x00);
this.sections.map(section => recurse(section));
}
}
`}
/>
<Playground
label="Encode a module!!"
runAction={`
const writer = new WasmWriter();
writer.write(new Module([
new FuncType(new ResultType([]), new ResultType([new NumType("i32")])),
new TableType(new RefType("func"), new Limit(0)),
]));
displayResult(writer.display());
`}
/>

View file

@ -0,0 +1,4 @@
.result {
border: 1px solid red;
padding: 6px;
}

View file

@ -18,7 +18,7 @@ type Props = CollectionEntry<"posts">;
const post = Astro.props; const post = Astro.props;
const { Content, remarkPluginFrontmatter, headings } = await post.render(); const { Content, remarkPluginFrontmatter, headings } = await post.render();
const { title, toc, heroImage: heroImagePath, heroAlt } = post.data; const { title, toc, heroImage: heroImagePath, heroAlt, draft } = post.data;
let heroImage; let heroImage;
if (heroImagePath) { if (heroImagePath) {
@ -30,7 +30,6 @@ const datestamp = post.data.date.toLocaleDateString(undefined, {
day: "numeric", day: "numeric",
}); });
const excerpt = remarkPluginFrontmatter.excerpt?.replaceAll("\n", ""); const excerpt = remarkPluginFrontmatter.excerpt?.replaceAll("\n", "");
console.log("except", excerpt);
--- ---
<BaseLayout title={title} pad={false} toc={toc}> <BaseLayout title={title} pad={false} toc={toc}>
@ -87,13 +86,18 @@ console.log("except", excerpt);
<hr /> <hr />
{
!draft && (
<script <script
src="https://utteranc.es/client.js" src="https://utteranc.es/client.js"
repo="iptq/blog-comments" repo="iptq/blog-comments"
issue-term="og:title" issue-term="og:title"
theme="github-light" theme="github-light"
crossorigin="anonymous" crossorigin="anonymous"
async></script> async
/>
)
}
</div> </div>
</div> </div>
</TocWrapper> </TocWrapper>

View file

@ -17,6 +17,19 @@
--link-hover-color: #{lighten($linkColor, 35%)}; --link-hover-color: #{lighten($linkColor, 35%)};
--code-color: firebrick; --code-color: firebrick;
--tag-color: #{lighten($linkColor, 35%)}; --tag-color: #{lighten($linkColor, 35%)};
// Syntax Highlighting
--astro-code-color-text: #24292e;
--astro-code-color-background: inherit;
--astro-code-token-constant: #005cc5;
--astro-code-token-string: white;
--astro-code-token-comment: #6a737d;
--astro-code-token-keyword: #d73a49;
--astro-code-token-parameter: #24292e;
--astro-code-token-function: var(--astro-code-color-text);
--astro-code-token-string-expression: #032f62;
--astro-code-token-punctuation: #000007;
--astro-code-token-link: white;
} }
} }
@ -39,5 +52,18 @@
--link-hover-color: #{darken($linkColor, 60%)}; --link-hover-color: #{darken($linkColor, 60%)};
--code-color: #{lighten(firebrick, 25%)}; --code-color: #{lighten(firebrick, 25%)};
--tag-color: #{darken($linkColor, 55%)}; --tag-color: #{darken($linkColor, 55%)};
// Syntax Highlighting
--astro-code-color-text: #e1e4e8;
--astro-code-color-background: inherit;
--astro-code-token-constant: #79b8ff;
--astro-code-token-string: #9ecbff;
--astro-code-token-comment: #6a737d;
--astro-code-token-keyword: #f97583;
--astro-code-token-parameter: #e1e4e8;
--astro-code-token-function: var(--astro-code-color-text);
--astro-code-token-string-expression: #85e89d;
--astro-code-token-punctuation: #ffab70;
--astro-code-token-link: black;
} }
} }

View file

@ -1,4 +1,4 @@
$breakpoint: 720px; $breakpoint: 720px;
$tocWidth: 240px; $tocWidth: 320px;
$tocBreakpoint: $breakpoint + $tocWidth; $tocBreakpoint: $breakpoint + $tocWidth;

View file

@ -27,7 +27,7 @@
a { a {
display: block; display: block;
padding: 10px; // padding: 10px;
color: var(--text-color); color: var(--text-color);
border-left: 4px solid transparent; border-left: 4px solid transparent;
border-radius: unset; border-radius: unset;

View file

@ -2,6 +2,7 @@
"extends": "astro/tsconfigs/strictest", "extends": "astro/tsconfigs/strictest",
"compilerOptions": { "compilerOptions": {
"moduleResolution": "Node", "moduleResolution": "Node",
"strictNullChecks": true "strictNullChecks": true,
"jsx": "react-jsx"
} }
} }