This commit is contained in:
parent
0627d6b0d9
commit
b47c8ffae3
6 changed files with 331 additions and 14 deletions
|
@ -6,26 +6,36 @@ import { sortBy } from "lodash-es";
|
||||||
interface Props {
|
interface Props {
|
||||||
basePath: string;
|
basePath: string;
|
||||||
drafts?: "exclude" | "include" | "only";
|
drafts?: "exclude" | "include" | "only";
|
||||||
|
filteredPosts?: Post[];
|
||||||
}
|
}
|
||||||
|
|
||||||
type Post = CollectionEntry<"posts">;
|
type Post = CollectionEntry<"posts">;
|
||||||
const { basePath, drafts: includeDrafts } = Astro.props;
|
const { basePath, drafts: includeDrafts, filteredPosts } = Astro.props;
|
||||||
|
|
||||||
let filter;
|
type FilterFn = (_: Post) => boolean;
|
||||||
switch (includeDrafts) {
|
|
||||||
case "exclude":
|
function unreachable(): never {
|
||||||
case undefined:
|
throw new Error("unreachable");
|
||||||
filter = (post: Post) => !post.data.draft;
|
|
||||||
break;
|
|
||||||
case "include":
|
|
||||||
filter = (_: Post) => true;
|
|
||||||
break;
|
|
||||||
case "only":
|
|
||||||
filter = (post: Post) => post.data.draft === true;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const allPosts = await getCollection("posts", filter);
|
function getFilter(): FilterFn {
|
||||||
|
switch (includeDrafts) {
|
||||||
|
case "exclude":
|
||||||
|
case undefined:
|
||||||
|
return (post: Post) => !post.data.draft;
|
||||||
|
case "include":
|
||||||
|
return (_: Post) => true;
|
||||||
|
case "only":
|
||||||
|
return (post: Post) => post.data.draft === true;
|
||||||
|
}
|
||||||
|
return unreachable();
|
||||||
|
}
|
||||||
|
|
||||||
|
const filter = getFilter();
|
||||||
|
let allPosts;
|
||||||
|
if (filteredPosts) allPosts = filteredPosts.filter(filter);
|
||||||
|
else allPosts = await getCollection("posts", filter);
|
||||||
|
|
||||||
const sortedPosts = sortBy(allPosts, (post) => -post.data.date);
|
const sortedPosts = sortBy(allPosts, (post) => -post.data.date);
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
215
src/content/posts/2023-09-01-formal-cek-machine-in-agda.md
Normal file
215
src/content/posts/2023-09-01-formal-cek-machine-in-agda.md
Normal file
|
@ -0,0 +1,215 @@
|
||||||
|
---
|
||||||
|
title: Building a formal CEK machine in agda
|
||||||
|
draft: true
|
||||||
|
date: 2023-09-01T13:53:23.974Z
|
||||||
|
tags: ["computer-science", "programming-languages", "formal-verification", "lambda-calculus"]
|
||||||
|
---
|
||||||
|
|
||||||
|
Back in 2022, I took a special topics course, CSCI 8980, on [reasoning about
|
||||||
|
programming languages using Agda][plfa], a dependently typed meta-language. For
|
||||||
|
the term project, we were to implement a simply-typed lambda calculus with
|
||||||
|
several extensions, along with proofs of certain properties.
|
||||||
|
|
||||||
|
[plfa]: https://plfa.github.io/
|
||||||
|
|
||||||
|
My lambda calculus implemented `call/cc` on top of a CEK machine.
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary><b>Why is this interesting?</b></summary>
|
||||||
|
|
||||||
|
Reasoning about languages is one way of ensuring whole-program correctness.
|
||||||
|
Building up these languages from foundations grounded in logic helps us
|
||||||
|
achieve our goal with more rigor.
|
||||||
|
|
||||||
|
As an example, suppose I wrote a function that takes a list of numbers and
|
||||||
|
returns the maximum value. Mathematically speaking, this function would be
|
||||||
|
_non-total_; an input consisting of an empty set would not produce reasonable
|
||||||
|
output. If this were a library function I'd like to tell people who write code
|
||||||
|
that uses this function "don't give me an empty list!"
|
||||||
|
|
||||||
|
But just writing this in documentation isn't enough. What we'd really like is
|
||||||
|
for a tool (like a compiler) to tell any developer who is trying to pass an
|
||||||
|
empty list into our maximum function "You can't do that." Unfortunately, most
|
||||||
|
of the popular languages being used today have no way of describing "a list
|
||||||
|
that's not empty."
|
||||||
|
|
||||||
|
We still have a way to prevent people from running into this problem, though
|
||||||
|
it involves pushing the problem to runtime rather than compile time. The
|
||||||
|
maximum function could return an "optional" maximum. Some languages'
|
||||||
|
implementations of optional values force programmers to handle the "nothing"
|
||||||
|
case, while others ignore it silently. But in the more optimistic case, even
|
||||||
|
if the list was empty, the caller would have handled it and treated it
|
||||||
|
accordingly.
|
||||||
|
|
||||||
|
This isn't a pretty way to solve this problem. _Dependent types_ gives us
|
||||||
|
tools to solve this problem in an elegant way, by giving the type system the
|
||||||
|
ability to contain values. This also opens its own can of worms, but for
|
||||||
|
questions about program correctness, it is more valuable than depending on
|
||||||
|
catching problems at runtime.
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
## Crash course on the lambda calculus
|
||||||
|
|
||||||
|
The [lambda calculus] is a mathematical abstraction for computation. The core
|
||||||
|
mechanism it uses is a concept called a _term_. Everything that can be
|
||||||
|
represented in a lambda calculus is some combination of terms. A term can have
|
||||||
|
several constructors:
|
||||||
|
|
||||||
|
[lambda calculus]: https://en.wikipedia.org/wiki/Lambda_calculus
|
||||||
|
|
||||||
|
- **Var.** This is just a variable, like `x` or `y`. During evaluation, a
|
||||||
|
variable can resolve to a value in the evaluation environment by name. If
|
||||||
|
the environment says `{ x = 5 }`, then evaluating `x` would result in 5.
|
||||||
|
|
||||||
|
- **Abstraction, or lambda (λ).** An _abstraction_ is a term that describes some
|
||||||
|
other computation. From an algebraic perspective, it can be thought of as a
|
||||||
|
function with a single argument (i.e f(x) = 2x is an abstraction, although
|
||||||
|
it would be written `(λx.2x)`)
|
||||||
|
|
||||||
|
- **Application.** Application is sort of the opposite of abstraction, exposing
|
||||||
|
the computation that was abstracted away. From an algebraic perspective,
|
||||||
|
this is just function application (i.e applying `f(x) = 2x` to 3 would
|
||||||
|
result in 2\*3. Note that only a simple substitution has been done and
|
||||||
|
further evaluation is required to reduce 2\*3)
|
||||||
|
|
||||||
|
### Why?
|
||||||
|
|
||||||
|
The reason it's set up this way is so we can reason about terms inductively.
|
||||||
|
Rather than having lots of syntax for making it easier for programmers to write
|
||||||
|
a for loop as opposed to a while loop, or constructing different kinds of
|
||||||
|
values, the lambda calculus focuses on function abstraction and calls, and
|
||||||
|
strips everything else away.
|
||||||
|
|
||||||
|
The idea is that because terms are just nested constructors, we can describe the
|
||||||
|
behavior of any term by just defining the behavior of these 3 constructors. The
|
||||||
|
flavorful features of other programming languages can be implemented on top of
|
||||||
|
the function call rules in ways that don't disrupt the basic function of the
|
||||||
|
evaluation.
|
||||||
|
|
||||||
|
In fact, the lambda calculus is Turing-complete, so any computation can
|
||||||
|
technically be reduced to those 3 constructs. I used numbers liberally in the
|
||||||
|
examples above, but in a lambda calculus without numbers, you could define
|
||||||
|
integers using a recursive strategy called [Church numerals]. It looks like this:
|
||||||
|
|
||||||
|
[church numerals]: https://en.wikipedia.org/wiki/Church_encoding
|
||||||
|
|
||||||
|
- Let `z` represent zero.
|
||||||
|
- Let `s` represent a "successor", or increment function. `s(z)` represents 1,
|
||||||
|
`s(s(z))` represents 2, and so on.
|
||||||
|
|
||||||
|
In lambda calculus terms, this would look like:
|
||||||
|
|
||||||
|
- 0 = `λs.(λz.z)`
|
||||||
|
- 1 = `λs.(λz.s(z))`
|
||||||
|
- 2 = `λs.(λz.s(s(z)))`
|
||||||
|
- 3 = `λs.(λz.s(s(s(z))))`
|
||||||
|
|
||||||
|
In practice, many lambda calculus incorporate higher level constructors such as
|
||||||
|
numbers or lists to make it so we can avoid having to represent them using only
|
||||||
|
a series of function calls. However, any time we add more syntax to a language,
|
||||||
|
we increase its complexity in proofs, so for now let's keep it simple.
|
||||||
|
|
||||||
|
### The Turing completeness curse
|
||||||
|
|
||||||
|
As I noted above, the lambda calculus is [_Turing-complete_][tc]. One feature of
|
||||||
|
Turing complete systems is that they have a (provably!) unsolvable "halting"
|
||||||
|
problem. Most of the simple terms shown above terminate predictably. But as an
|
||||||
|
example of a term that doesn't halt, consider the _Y combinator_, an example of
|
||||||
|
a fixed-point combinator:
|
||||||
|
|
||||||
|
[tc]: https://en.wikipedia.org/wiki/Turing_completeness
|
||||||
|
|
||||||
|
Y = λf.(λx.f(x(x)))(λx.f(x(x)))
|
||||||
|
|
||||||
|
That's quite a mouthful. If you tried calling Y on some term, you will find that
|
||||||
|
evaluation will quickly expand infinitely. That makes sense given its purpose:
|
||||||
|
to find a _fixed point_ of whatever function you pass in.
|
||||||
|
|
||||||
|
> As an example, the fixed-point of the function f(x) = sqrt(x) is 1. That's
|
||||||
|
> because f(1) = 1. The Y combinator attempts to find the fixed point by simply
|
||||||
|
> applying the function multiple times. In the untyped lambda calculus, this can
|
||||||
|
> be used to implement simple (but possibly unbounded) recursion.
|
||||||
|
|
||||||
|
This actually proves disastrous for trying to reason about the logic of a
|
||||||
|
program. If we don't even know for sure if something will halt, how can we know
|
||||||
|
that it'll produce the correct value? In fact, you can prove false statements
|
||||||
|
using infinite recursion as a basis.
|
||||||
|
|
||||||
|
This is why we actually prefer _not_ to work with Turing-complete languages when
|
||||||
|
doing logical reasoning on program evaluation. Instead, we always want to add
|
||||||
|
some constraints on it to make evaluation total, ensuring that we have perfect
|
||||||
|
information about our program's behavior.
|
||||||
|
|
||||||
|
### Simply-typed lambda calculus
|
||||||
|
|
||||||
|
The simply-typed lambda calculus (STLC) adds types to every term. Types are
|
||||||
|
crucial to any kind of static program analysis. Suppose I was trying to apply
|
||||||
|
the term 5 to 6 (in other words, call 5 with the argument 6 as if 5 was a
|
||||||
|
function). As humans we can look at that and instantly recognize that the
|
||||||
|
evaluation would be invalid, yet under the untyped lambda calculus, it would be
|
||||||
|
completely representable.
|
||||||
|
|
||||||
|
To solve this in STLC, we would make this term completely unrepresentable at
|
||||||
|
all. To say you want to apply 5 to 6 would not be a legal STLC term. We do this
|
||||||
|
by requiring that all STLC terms are untyped lambda calculus terms accompanied
|
||||||
|
by a _type_.
|
||||||
|
|
||||||
|
This gives us more information about what's allowed before we run the
|
||||||
|
evaluation. For example, numbers may have their own type `Nat` (for "natural
|
||||||
|
number"), while functions have a special "arrow" type `_ -> _`, where the
|
||||||
|
underscores represent other types. A function that takes a number and returns a
|
||||||
|
boolean (like isEven) would have the type `Nat -> Bool`, while a function that
|
||||||
|
takes a boolean and returns another boolean would be `Bool -> Bool`.
|
||||||
|
|
||||||
|
With this, we have a framework for rejecting terms that would otherwise be legal
|
||||||
|
in untyped lambda calculus, but would break when we tried to evaluate them. A
|
||||||
|
function application would be able to require that the argument is the same type
|
||||||
|
as what the function is expecting.
|
||||||
|
|
||||||
|
The nice property you get now is that all valid STLC programs will never get
|
||||||
|
_stuck_, which is being unable to evaluate due to some kind of error. Each term
|
||||||
|
will either be able to be evaluated to a next state, or is done.
|
||||||
|
|
||||||
|
A semi-formal definition for STLC terms would look something like this:
|
||||||
|
|
||||||
|
- **Var.** Same as before, it's a variable that can be looked up in the
|
||||||
|
environment.
|
||||||
|
|
||||||
|
- **Abstraction, or lambda (λ).** This is a function that carries three pieces
|
||||||
|
of information: (1) the name of the variable that its input will be substituted
|
||||||
|
for, (2) the _type_ of the input, and (3) the body in which the substitution
|
||||||
|
will happen.
|
||||||
|
|
||||||
|
- **Application.** Same as before.
|
||||||
|
|
||||||
|
It doesn't really seem like changing just one term changes the language all that
|
||||||
|
much. But as a result of this tiny change, _every_ term now has a type:
|
||||||
|
|
||||||
|
- `5 :: Nat`
|
||||||
|
- `λ(x:Nat).2x :: Nat -> Nat`
|
||||||
|
- `isEven(3) :: (Nat -> Bool) · Nat = Bool`
|
||||||
|
|
||||||
|
Notation: (`x :: T` means `x` has type `T`, and `f · x` means `f` applied to
|
||||||
|
`x`)
|
||||||
|
|
||||||
|
This also means that some values are now unrepresentable:
|
||||||
|
|
||||||
|
- `isEven(λx.2x) :: (Nat -> Bool) · (Nat -> Nat)` doesn't work because the type
|
||||||
|
of `λx.2x :: Nat -> Nat` can't be used as an input for `isEven`, which is
|
||||||
|
expecting a `Nat`.
|
||||||
|
|
||||||
|
We have a good foundation for writing programs now, but this by itself can't
|
||||||
|
qualify as a system for computation. We need an abstract machine of sorts that
|
||||||
|
can evaluate these symbols and actually compute on them.
|
||||||
|
|
||||||
|
In practice, there's a number of different possible abstract machines that can
|
||||||
|
evaluate the lambda calculus. Besides the basic direct implementation, alternate
|
||||||
|
implementations such as [interaction nets] have become popular due to being able
|
||||||
|
to be parallelized efficiently.
|
||||||
|
|
||||||
|
[interaction nets]: https://en.wikipedia.org/wiki/Interaction_nets
|
||||||
|
|
||||||
|
## CEK machine
|
||||||
|
|
||||||
|
A CEK machine is responsible for evaluating a lambda calculus term.
|
12
src/content/posts/2023-09-01-hindley-milner/index.md
Normal file
12
src/content/posts/2023-09-01-hindley-milner/index.md
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
---
|
||||||
|
title: Hindley-Milner type inference
|
||||||
|
date: 2023-09-01T13:06:55.614Z
|
||||||
|
tags:
|
||||||
|
- type-theory
|
||||||
|
draft: true
|
||||||
|
---
|
||||||
|
|
||||||
|
Today, a lot of languages have a feature called something along the lines of
|
||||||
|
"type inference". The idea is that through the way variables are passed and
|
||||||
|
used, you can make guesses about what type it should be, and preemptively point
|
||||||
|
out invalid or incompatible uses. Let's talk about how it works.
|
|
@ -47,6 +47,26 @@ const datestamp = post.data.date.toLocaleDateString(undefined, {
|
||||||
<div class="post-container">
|
<div class="post-container">
|
||||||
<h1 class="post-title">{post.data.title}</h1>
|
<h1 class="post-title">{post.data.title}</h1>
|
||||||
|
|
||||||
|
<span class="tags">
|
||||||
|
{
|
||||||
|
post.data.draft && (
|
||||||
|
<a href="/drafts" class="tag draft">
|
||||||
|
<i class="fa fa-warning" aria-hidden="true" />
|
||||||
|
<span class="text">draft</span>
|
||||||
|
</a>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
post.data.tags.map((tag) => (
|
||||||
|
<a href={`/tags/${tag}`} class="tag">
|
||||||
|
<i class="fa fa-tag" aria-hidden="true" />
|
||||||
|
<span class="text">{tag}</span>
|
||||||
|
</a>
|
||||||
|
))
|
||||||
|
}
|
||||||
|
</span>
|
||||||
|
|
||||||
<small class="post-meta">
|
<small class="post-meta">
|
||||||
Posted on <Timestamp timestamp={post.data.date} />
|
Posted on <Timestamp timestamp={post.data.date} />
|
||||||
- {remarkPluginFrontmatter.minutesRead}
|
- {remarkPluginFrontmatter.minutesRead}
|
||||||
|
|
28
src/pages/tags/[tag].astro
Normal file
28
src/pages/tags/[tag].astro
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
---
|
||||||
|
import { join, dirname } from "path";
|
||||||
|
import BaseLayout from "../../layouts/BaseLayout.astro";
|
||||||
|
import PostList from "../../components/PostList.astro";
|
||||||
|
import { getCollection } from "astro:content";
|
||||||
|
|
||||||
|
export async function getStaticPaths() {
|
||||||
|
const posts = await getCollection("posts");
|
||||||
|
return posts.flatMap((post) =>
|
||||||
|
post.data.tags.map((tag) => ({
|
||||||
|
params: { tag },
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const { tag } = Astro.params;
|
||||||
|
const posts = await getCollection("posts");
|
||||||
|
const taggedPosts = posts.filter((post) => post.data.tags.includes(tag));
|
||||||
|
|
||||||
|
const currentUrl = Astro.url;
|
||||||
|
const root = dirname(dirname(currentUrl.pathname));
|
||||||
|
---
|
||||||
|
|
||||||
|
<BaseLayout>
|
||||||
|
<p>Posts tagged with {tag}</p>
|
||||||
|
|
||||||
|
<PostList basePath={join(root, "posts")} filteredPosts={taggedPosts} />
|
||||||
|
</BaseLayout>
|
|
@ -82,6 +82,7 @@
|
||||||
font-size: 0.9rem;
|
font-size: 0.9rem;
|
||||||
padding: 0 30px;
|
padding: 0 30px;
|
||||||
line-height: 1.5;
|
line-height: 1.5;
|
||||||
|
position: relative;
|
||||||
|
|
||||||
p:nth-of-type(1) {
|
p:nth-of-type(1) {
|
||||||
padding-top: 0;
|
padding-top: 0;
|
||||||
|
@ -89,8 +90,13 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
summary {
|
summary {
|
||||||
|
cursor: pointer;
|
||||||
padding: 10px 0;
|
padding: 10px 0;
|
||||||
transition: margin 150ms ease-out;
|
transition: margin 150ms ease-out;
|
||||||
|
|
||||||
|
position: sticky;
|
||||||
|
top: 0;
|
||||||
|
background-color: var(--background-color);
|
||||||
}
|
}
|
||||||
|
|
||||||
&[open] summary {
|
&[open] summary {
|
||||||
|
@ -196,3 +202,29 @@ hr.endline {
|
||||||
border-width: 1px 0 0 0;
|
border-width: 1px 0 0 0;
|
||||||
border-color: var(--hr-color);
|
border-color: var(--hr-color);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.tags {
|
||||||
|
display: flex;
|
||||||
|
gap: 6px;
|
||||||
|
margin-bottom: 6px;
|
||||||
|
|
||||||
|
.tag {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
background-color: var(--tag-color);
|
||||||
|
padding: 2px 7px;
|
||||||
|
border-radius: 4px;
|
||||||
|
|
||||||
|
&.draft {
|
||||||
|
background-color: orange;
|
||||||
|
color: black;
|
||||||
|
}
|
||||||
|
|
||||||
|
.text {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
&:hover {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue