Compare commits

..

1 commit

Author SHA1 Message Date
Michael Zhang c467d9fd67
eleventy
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2023-05-07 23:10:26 -05:00
261 changed files with 3262 additions and 15315 deletions

View file

@ -1,28 +0,0 @@
image: alpine/edge
oauth: pages.sr.ht/PAGES:RW
packages:
- hut
- npm
- rsync
- typst
environment:
site: mzhang.io
secrets:
- 0b26b413-7901-41c3-a4e2-3c752228ffcb
sources:
- https://git.sr.ht/~mzhang/blog
tasks:
- install: |
sudo npm install -g pnpm
- build: |
cd blog
pnpm install
pnpm run build
# hugo --buildDrafts --minify --baseURL https://mzhang.io
- upload: |
cd blog/dist
tar -cvz . > ../site.tar.gz
cd ..
hut pages publish -d $site site.tar.gz
# echo "mzhang.io ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBzBZ+QmM4EO3Fwc1ZcvWV2IY9VF04T0H9brorGj9Udp" >> ~/.ssh/known_hosts
# rsync -azvrP dist/ sourcehutBuilds@mzhang.io:/mnt/storage/svcdata/blog-public

View file

@ -1 +0,0 @@
*

View file

@ -6,4 +6,4 @@ insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8
indent_style = space
indent_size = 2
indent_size = 4

2
.gitattributes vendored
View file

@ -1,2 +0,0 @@
public/katex/**/* linguist-vendored
src/**/*.md linguist-documentation=false

38
.gitignore vendored
View file

@ -1,32 +1,14 @@
# build output
dist/
# generated types
.astro/
/public
/old
/resources
/content/logseq
# dependencies
node_modules/
# logs
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
# environment variables
.env
.env.production
# macOS-specific files
.DS_Store
PragmataPro-Mono-Liga-Regular-Nerd-Font-Complete.woff2
*.agdai
_build
.hugo_build.lock
.direnv
*.agdai
/result*
/result
.pnpm-store
/public/generated
/static/fonts/patched/PragmataPro-Mono-Liga-Regular-Nerd-Font-Complete.woff2
node_modules
_site

View file

@ -1,3 +0,0 @@
public/katex
pnpm-lock.yaml
src/styles/fork-awesome

View file

@ -1,17 +0,0 @@
{
"plugins": ["prettier-plugin-astro"],
"overrides": [
{
"files": "*.astro",
"options": {
"parser": "astro"
}
}
],
"useTabs": false,
"tabWidth": 2,
"singleQuote": false,
"trailingComma": "all",
"printWidth": 100
}

8
.prettierrc.json5 Normal file
View file

@ -0,0 +1,8 @@
{
tabWidth: 2,
semi: true,
useTabs: false,
singleQuote: false,
trailingComma: "es5",
printWidth: 80,
}

View file

@ -1,3 +0,0 @@
public
package-lock.json
src/styles/fork-awesome

View file

@ -1,4 +0,0 @@
{
"recommendations": ["astro-build.astro-vscode"],
"unwantedRecommendations": []
}

11
.vscode/launch.json vendored
View file

@ -1,11 +0,0 @@
{
"version": "0.2.0",
"configurations": [
{
"command": "./node_modules/.bin/astro dev",
"name": "Development server",
"request": "launch",
"type": "node-terminal"
}
]
}

View file

@ -1,26 +1,18 @@
steps:
pipeline:
build:
image: git.mzhang.io/michael/blog-docker-builder:x0m1c7r2qzc3qbyw8sfv5flfv75xiqdz
environment:
- ASTRO_TELEMETRY_DISABLED=1
image: klakegg/hugo:ext-pandoc-ci
commands:
- mkdir /tmp
- rm -rf node_modules
- npm i -g pnpm@9.4.0
- npx pnpm install --frozen-lockfile
- npx pnpm run build
when:
- event: push
- hugo --buildDrafts --minify --baseURL https://mzhang.io
deploy:
image: git.mzhang.io/michael/blog-docker-builder:x0m1c7r2qzc3qbyw8sfv5flfv75xiqdz
image: alpine
commands:
- apk add rsync openssh
- echo "$${SSH_SECRET_KEY}" > SSH_SECRET_KEY
- chmod 600 SSH_SECRET_KEY
- mkdir -p ~/.ssh
- echo "mzhang.io ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIO+RKFYjD8UjqhfOHFHNyijkbGzC4fJEIIhrBwHj+FsQ" >> known_hosts
- rsync -azrP -e "ssh -i SSH_SECRET_KEY -o UserKnownHostsFile=known_hosts" dist/ blogDeploy@mzhang.io:/home/blogDeploy/public
secrets: [SSH_SECRET_KEY]
- echo "mzhang.io ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBzBZ+QmM4EO3Fwc1ZcvWV2IY9VF04T0H9brorGj9Udp" >> ~/.ssh/known_hosts
- rsync -azvrP -e "ssh -i SSH_SECRET_KEY" public/ sourcehutBuilds@mzhang.io:/mnt/storage/svcdata/blog-public
secrets: [ SSH_SECRET_KEY ]
when:
- branch: master
event: push
branch: master

View file

@ -1,5 +0,0 @@
# Michael's Blog
https://mzhang.io
License: GPL-3.0 / CC-BY-SA-4.0

View file

@ -1,56 +0,0 @@
import { defineConfig } from "astro/config";
import mdx from "@astrojs/mdx";
import sitemap from "@astrojs/sitemap";
import { rehypeAccessibleEmojis } from "rehype-accessible-emojis";
import remarkReadingTime from "./plugin/remark-reading-time";
import remarkEmoji from "remark-emoji";
import remarkDescription from "astro-remark-description";
import remarkAdmonitions, { mkdocsConfig } from "./plugin/remark-admonitions";
import remarkMath from "remark-math";
import rehypeKatex from "rehype-katex";
import remarkTypst from "./plugin/remark-typst";
import rehypeLinkHeadings from "@justfork/rehype-autolink-headings";
import rehypeSlug from "rehype-slug";
import markdoc from "@astrojs/markdoc";
import remarkAgda from "./plugin/remark-agda";
const outDir = "dist";
const base = process.env.BASE ?? "/";
const publicDir = "public";
// https://astro.build/config
export default defineConfig({
site: "https://mzhang.io",
integrations: [mdx(), sitemap(), markdoc()],
outDir,
base,
trailingSlash: "always",
publicDir,
markdown: {
syntaxHighlight: "shiki",
shikiConfig: { theme: "css-variables" },
remarkPlugins: [
() => remarkAgda({ outDir, base, publicDir }),
remarkMath,
[remarkAdmonitions, mkdocsConfig],
remarkReadingTime,
remarkTypst,
remarkEmoji,
[
remarkDescription,
{
name: "excerpt",
},
],
],
rehypePlugins: [
[rehypeKatex, {}],
rehypeAccessibleEmojis,
rehypeSlug,
[rehypeLinkHeadings, { behavior: "wrap" }],
],
},
});

View file

@ -1,17 +0,0 @@
{
"$schema": "https://biomejs.dev/schemas/1.7.3/schema.json",
"organizeImports": {
"enabled": true
},
"formatter": {
"enabled": true,
"indentStyle": "space",
"indentWidth": 2
},
"linter": {
"enabled": true,
"rules": {
"recommended": true
}
}
}

View file

@ -1,2 +0,0 @@
include: src/content/posts src
depend: standard-library cubical

BIN
bun.lockb

Binary file not shown.

View file

@ -1,15 +1,17 @@
---
title: "About"
layout: ../layouts/BaseLayout.astro
---
+++
title = "About"
weight = 2
[cascade]
type = "generic"
layout = "single"
+++
# About Me
import Intro from "../content/partials/shortBio.md";
{{< left-nav >}}
Hi there! :wave:
<Intro />
<!-- more -->
### Research
@ -18,23 +20,23 @@ write some blog posts as I learn more. My advisor is [Favonia].
### University Involvement
I also love to participate in computing related student groups at the University
of Minnesota. Here's some of the groups that I'm involved with:
During my time at the University of Minnesota, I like to be actively involved in
computing related student groups.
- **[GopherHack]**.
I'm one of the founding officers at the GopherHack organization, hoping to grow a CTF community at the University.
I prepare material for club activities.
- **[PL Seminar]**.
A group focused on reading and discussing programming languages related papers.
- **[UMN Kernel Object]**.
A group dedicated to studying operating system development, created in the aftermath of the UMN Linux kernel controversy.
- **[ACM]**.
I was webmaster in undergrad and wrote the current ACM website, as well as helping out with other events such as CTF.
- **[GopherHack]**. I'm one of the founding officers at the GopherHack
organization, hoping to grow a CTF community at the University. I prepare
material for club activities.
- **[PL Seminar]**. A group focused on reading and discussing programming
languages related papers.
- **[UMN Kernel Object]**. A group dedicated to studying operating system
development, created in the aftermath of the UMN Linux kernel controversy.
Previously, I was also involved with:
- **[SASE]**.
I was webmaster and was involved in organizing student group events as well.
- **[ACM]**. I was webmaster and wrote the current website, as well as helping
out with other events such as CTF.
- **[SASE]**. I was webmaster and was involved in organizing student group
events as well.
### Open-source Projects
@ -44,21 +46,25 @@ Some of the projects I've been working on in my free time include:
- **[Garbage]**. A CLI interface to the trash can API.
- **[Leanshot]**. A Linux screen capture tool.
More can be found on my public [Forgejo][2] instance.
More can be found on [this page][12] or my public [Gitea][2].
I've also started making an increased effort at using and supporting [FOSS], and
other software that're not predatory towards users. As a part of this effort,
I'm also self-hosting and rewriting some of the services and software that I use
regularly.
regularly. Find out what I'm using [here][9].
### Hobbies
I love cooking, listening to music, chess, bouldering, and beer.
If you're in the Minneapolis area, let's chat!
I'm also an avid rhythm game player and beatmap creator, mostly involved with
the free-to-play game [osu!]. Check out some of my beatmaps on my osu!
[userpage].
I also enjoy playing badminton &#x1F3F8; at the rec.
[2]: https://git.mzhang.io/explore
[9]: setup
[10]: pgp.txt
[12]: ../projects
[cubical]: https://ncatlab.org/nlab/show/cubical+type+theory
[favonia]: https://favonia.org
[foss]: https://en.wikipedia.org/wiki/Free_and_open-source_software
@ -72,4 +78,3 @@ If you're in the Minneapolis area, let's chat!
[acm]: https://acm.umn.edu
[sase]: https://saseumn.org
[umn kernel object]: https://github.com/UMN-Kernel-Object
[aws]: https://aws.amazon.com

78
content/about/pgp.txt Normal file
View file

@ -0,0 +1,78 @@
-----BEGIN PGP PUBLIC KEY BLOCK-----
mQINBF4Zd7ABEADGHbFm8V6IPZLt7ZFvS0dXMcjpKgWSyeoLGTZYtZ+tLTgAE2Uc
FCzpNvo7eIu4kZ1bTsLbmVVXaVz0OKnHz+XfgrLnT2uGsABYfv3rwHd/Ia3Oe4Sc
ZNVI2UPmi/aLQUZGZrTBRflJd8xs4Ckl8OMOh3qMm0M0uHvYH3kb8gAeLMQZlyFi
P+65NsxNigXlgvtJzkK1vsqUOytYIJsw639f7Kf4W8QZfHTZ3WOOV/UC7Tuq1s0a
YgcftlQ1b9O5q01rJvVTNHHGv7i/9nhUMFVuUkOw3aAV3gn/e/THX+BWy0HJ/qtB
MGfeW8U2UKPqjJzxDIlaDXlPoO4rhrk0yL+CKKCCQNx87Z2PvObRz7u36gU0fgFk
m7sniexroI6lDA5TdWYEcPMlwfMx2zXoFBgx5Sa1pzMw/XJtqG1DFAUsSuc6g3Ty
5qOH7uSLFdoO3AflSCXMuS77l7595mptW3zkKcvFKre+u9ZejDePZdsjxnWovWC/
yCenB7dYofpJ1/RPPGdl0AuTouPmb56/u2FJ3yukJXXSBpR3iXwK+u4962vSJd3t
QUtqAHo23bvvRj0ZSn8ujIcXNVO3QPXHxnSQ86mnwh3CPL6JV+zTwesiVYvxBTHJ
kzPMWmpYzsJqW5KNOQA09arNvxyRcgIBpKBKZv34dANYvpzWKEQ0icIH8wARAQAB
tCNNaWNoYWVsIFpoYW5nIDxpcHRxQHByb3Rvbm1haWwuY29tPokCVAQTAQgAPhYh
BJJezAKJDVza4mGA1L2kejGjyO5rBQJeGXewAhsDBQkDwmcABQsJCAcCBhUKCQgL
AgQWAgMBAh4BAheAAAoJEL2kejGjyO5rXfgP/2KwYW5d0oEk9m6ZpORf6KkRlV2y
tEhiYIjAAuyQDfsqUtgfNMbTlK7MGuVhjUTZY6BP+CRO5xPUyGFBYOSOjgtwe/jU
QNFAhyUwBmWOXxezX1ZL2p5FJDQkiJFOzsCKO76+awalmp9t9pMUDhx8fkM1QcMM
Czp4QmeUQ+cZQnGQI8mRBMzX7adEUDusMQTtZLbE+mBdP5Uxu16CsR0JalF1OQOu
fLJx80ywQ0t4448EQJQ4PgLcYRCLN9m3fvMRIZy6BwfCAge7l4EvN7UtljJwQmlA
vTbdV6QhaaVeRtHUP6ZhZYxVgKxYaVsm0tDV89pkFR0xB+wmrZKMWMcQZ5X1XwC+
4Ubdmo7kcfcb7D/Sd4C/GkGDP9Jobn0E0YfBemuWJioC9wt71xREsSAWFNAZ3PzE
XTF6A5sEHXnxzC3r+ei/ivHhjbMJDqVCXmVqUufS7Cf06lqSyOhEpB6I4sn83sSw
l/RdqzoDsG/FDudxtgO99r3g405cqR9hVZCFleg1stHs7mDaFnlFaHnJTSqlIU4x
F6XD4niJa2Ti5xY9tHCqnBYXhlQfAwEwIqONGrrxcl/+YWlRANqEEYW7v28+bZvj
8Pm+QzW4hjCQ7xHTMeXiMu6yd8RrldPfhb79n4Wsn9dH7cBkMLqjILC+zxqhKXfN
ew6sqp0m+bYRYXBOtB5NaWNoYWVsIFpoYW5nIDxtYWlsQG16aGFuZy5pbz6JAlcE
EwEKAEECGwMFCQPCZwAFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AWIQSSXswCiQ1c
2uJhgNS9pHoxo8juawUCX5hUgQIZAQAKCRC9pHoxo8jua6oID/9ktY4qnpVLMjiP
hfXrFHml/G7jGAPI/8We07VFEC9f4qKCtwiZIesyltDRiuW+Bdokcoz2oykI3dqL
MBJLLrvNw409MW34CkhuKxJEKifgNGFja7zljWSOZdVERvMifWh+w/B5eqjWR0iy
HTpAjzVtYHGMKVK9sBgVPnnba5b19vjmEQKLpiVvhxCZejmUgvICm1Q664540LWz
xFSgJmz6gONanea6URxyvF3yeT2UY7utDKd7LELd0caxNMJMkqgBrEwak2ykS4OZ
SD1r/kC6pwRTjEHL5uz0nt/IlcUCKJwNplSSyTewfx8wESGzHdEURS6je0OdkKkn
7t6JH7XLMYwLKW/TAVBQV19IOdvEr/cNWGmpcW/K/U11vFVxYB0v9URWEwqoWsx3
K/Uz9Kp9i8dqnMy4IfFx6AUMj6aQp9NyrkNB/Oc/C61PCG1/Fq4VHLgy3OLGIOum
tM3XPUs7BOvg3MCyVYu0coCinss6RLOUKBAuuEc9J+GB1o0m3bcUGF/XmJpopMpL
S+HlPtlo29FSmGWQdsyO5Ms33WUHYJxt3bA9bnCEwvPojUzY39YjQ3Pvg3VqCzI0
dhQs9lKUAr+ias7ydzpcTx3tnRsTtmCUV8AlQYE38fgQU8/Y6CZgXn/l32nv4pyi
0Vdu3wbRSI+QXV6xaSVvtyDgvT09xrQfTWljaGFlbCBaaGFuZyA8bXpoYW5nQHNp
ZnQubmV0PokCVAQTAQoAPhYhBJJezAKJDVza4mGA1L2kejGjyO5rBQJf/LV9AhsD
BQkDwmcABQsJCAcDBRUKCQgLBRYCAwEAAh4BAheAAAoJEL2kejGjyO5rKl0P/irR
bZhZ6kvAUAFot0eBgOJrGTD+3+8n8VnVHb1AGn6Pq7UqObhrR5gT9zf9A0t+x14L
C8y+e/OI+/hZWdj2z0n79D3X9ZeHBrU00S1gyBoluc+eKN8yW/A8KfwC/IX5c1ov
nIz/pP5056Dr9nlvZ99gSg3yR73WYodtH7vNQ9QOsiC99dgMEA04KOjSoDeCkWSv
+6WwFSjDsOz+SyyrU7uQQSbad00aZwArHQ1iRbqaV+/eESY7aI4TbXSVEDZ36QJg
LvpAO/pkz3nlKE6BRGl8SFjj3Lr5jYf/gH7A7BiYi1F7WnWHTaIIL3uxxj4U5eZB
Xd1YM8weAMeU9xT7teyPj35vfZtRebmquXkzeBLIzfjCCUEr2XAgIGajsYcI8fl9
nIGahng1fs1fGah2J3vR3QM9mL2SaQaknU2lEum9XUDiQuIr6GdjfeH0xyfqIEGG
2S2uiZvTYKkQxCt9cdaeclfJWcQnF+voveRPzzKAIw6q2V2JMaFkd9uTuBkGY+Ar
q6z6C9McHxzyoGF4/Nlbmrw2dKgD4lY5Hr92WiwwDnqlBhYPEWR2gSidYqXtRgPW
ICLt1DrBSzMFqNr3hAcXVjOiFklozpDL1RrUhra5azuscirzM9aIHuz85SzLzpCC
WSvyhSyknOTOp6dmQrt7mjqDPHw11mDYcBMtFmsQuQINBF4Zd7ABEAC5XlB/S3z+
WErLjskIkEU4+WmuRZh3eHQCgN//fxondOjODHNUIBydAY+7A4PTfvPwvhRKMIAT
kVCcRtaT3biqL8PZA7ov2Ul3naGsi42nNc4MHi76mZ82LUtuhdRIKIBMsJrR22pW
/LLgD4tLqfiaKwzijULFkdaPBdElP4S+dbLb7m3CaugwXcJqHzHTS/iGZyC4uD29
6NlnammB/vANLhWtGGWobnaieFfFaUNoNrT0T6z1zY4TFIRKshT5HixIhn8qFJuM
cyyjVeimNWekSLyo1UKwxLVkAhu1jAnd8C6g0X3vFbhthCLQqBSlxGtLjJRVRAU2
WjiXZsleJXsOBuhDj8GnanPZwVNBgjwNPIKY5X17WAFm324SQD6Hyc1qUMvGoKo5
OmLulBBX6Z9uwSLCe5q9clEwcqqf0KCOQasNkDlUTw34T2Xc47yycc/2UgVnfrXF
AteFdjmnuvQRRc8J5Gk/R+sclmQNE9v4TPr4gdwiaz2xpclYyy5NhP+4u1G6ovzH
/jePm2a3ZyejPEzy1JXZp5HwzgTL7WcPq+NHAgNOJ3wn7dxI8MZogzjEdGdQDfe1
ckh4BKl9PlZ+q9IZvECAm3HVJ5u4N8sF3KmmCQwCPPhoueZF6GzoMnG4fbs1N1jy
O0K1nfwnFjwz8E9hiWAyEpEZPjndJzs37QARAQABiQI8BBgBCAAmFiEEkl7MAokN
XNriYYDUvaR6MaPI7msFAl4Zd7ACGwwFCQPCZwAACgkQvaR6MaPI7mvAIQ/+Mtql
UWC72vXOjoRJ4BQy3B5pj9vgikgH4ePD3FFpgQzd/aWMgsyVhbV75GNOnk5nOHMD
X0bIVB7sBF8rAtybZEdxc8TS4xmR3wMCFC79TDwf8JTkBNJfYwBZfRzvHjXe0wlE
TTyCtdg6/nAzmLUcIonVlS2PUSeL+ueTge2b/4lRMOm7KXoPxChcBDQz+IgD38kt
NnOF+mWAEwIwQzaav4CqEHnB1N+S2exr6THoQmvhE6QK9aUsDmG8vsr210uJDu8r
d453NMYHONKgkL9IUJpgSWPu/OtEiiRi5mKTWEMeeNYtDJFdk+hszKsp+UzKHgbz
8/+mbOdqgbcozdQohEfTiy0lsCg/ZW+H9z/NRLHm7ldZqskNvRxlfRC7mVqh8yHd
EVFeFBXEnlU2RkXvxlxCZaEfsfogFLDlFmQXezMvSP120kWA7dcHQ8GMGJgDHDCj
cqyzI+0DYd2+Gpj9k6DJvXXb7zsirNY3ffeqTn3D5gcJ2KtSFEr3GUaINgO6dGej
PK1cmy3B5pp0Q2A4uuIetKZfE0O3oFrqJkqwM1Hn4BiRl5aMb9HiuGfg0G495ojo
Shnpdft7CEg0kU/RUfLjAxQs4va53Ma86FFG1FMxDpdaPCQcJma2C2Wr5p/2j3d3
gGtrxbJh/Mhq51seECOEBVQIrF8gnKYMrOkOv10=
=8UOD
-----END PGP PUBLIC KEY BLOCK-----

View file

@ -0,0 +1,12 @@
+++
title = "random scripts"
+++
### convert a bunch of `flac`s to `mp3`
```bash
#!/bin/bash
function flac2mp3() { ffmpeg -y -i "$1" -acodec libmp3lame "$(basename "$1")".mp3; }
export -f flac2mp3 # only works in bash
fd "\.flac$" | parallel flac2mp3
```

48
content/about/setup.md Normal file
View file

@ -0,0 +1,48 @@
+++
title = "Setup"
+++
# Setup
List of software and services I use and endorse, mostly FOSS.
## Desktop
- [**Arch Linux**](https://archlinux.org/) OS with rolling releases.
- [**Home manager** (MIT)](https://github.com/nix-community/home-manager) Dotfile manager.
- [**Firefox** (MPL-2.0)](https://www.mozilla.org/firefox) Browser.
- [**Thunderbird** (MPL-2.0)](https://www.thunderbird.net) Email + calendar client.
## Development
- [**Neovim** (Apache-2.0/Vim)](https://neovim.io/) Text editor.
## Server
- [**NixOS** (MIT)](https://nixos.org/) Declarative and reproducible operating system.
- [**Hugo** (Apache-2.0)](https://gohugo.io/) Static site generator that powers this site.
- [**Gitea** (MIT)](https://gitea.io/) Self-hosted git.
## Mobile
- [**DAVx5** (GPL-3.0)](https://www.davx5.com/) CalDAV and CardDAV sync for Android.
- [**Gadgetbridge** (AGPL-3.0)](https://gadgetbridge.org/) Smartwatch client.
- [**K-9 Mail** (Apache-2.0)](https://k9mail.app/) Mail client.
- [**Feeder** (GPL-3.0)](https://f-droid.org/packages/com.nononsenseapps.feeder/) RSS aggregator.
## Music
- [**Navidrome** (GPL-3.0)](https://navidrome.com) Self-hosted Subsonic-compatible streaming server.
- [**Sublime Music** (GPL-3.0)](https://sublimemusic.app) GTK Subsonic-compatible music client.
- [**Subtracks** (GPL-3.0)](https://github.com/austinried/subtracks) Android Subsonic-compatible music client.
## Services
- [**SourceHut** (AGPL-3.0)](https://sourcehut.org) Git, mailing list, IRC bouncer, etc. hosting.
- [**Element**](https://element.io/) Federated chat provider.
- [**ProtonMail** (Proprietary)](https://protonmail.com/) Encrypted email.
- [**Signal** (GPL-3.0/AGPL-3.0)](https://signal.org/) Encrypted chat.
## Games
Mostly from Steam.

9
content/drafts/_index.md Normal file
View file

@ -0,0 +1,9 @@
+++
title = "Drafts"
weight = 1
hidden = true
[cascade]
type = "drafts"
+++

4
content/index.md Normal file
View file

@ -0,0 +1,4 @@
---
---
Blog

3
content/main.scss Normal file
View file

@ -0,0 +1,3 @@
body {
background-color: gray;
}

View file

@ -1,8 +1,8 @@
---
title: My new life stack
date: 2018-02-01
tags: ["arch", "linux", "setup", "computers"]
---
+++
title = "My new life stack"
date = 2018-02-01
tags = ["arch", "linux", "setup", "computers"]
+++
This is my first post on my new blog! <!--more--> I used to put a CTF challenge writeup here but decided to change it up a bit. Recently, I've been changing a lot of the technology that I use day to day. Here's some of the changes that I've made!
@ -10,7 +10,7 @@ This is my first post on my new blog! <!--more--> I used to put a CTF challenge
I've ran regular Ubuntu on my laptop for a while, then switched to Elementary OS, which I found a lot more pleasing to use. After using Elementary OS for about 6 months, some of the software on my computer started behaving strangely, and I decided it was time for some change.
`````
```
# michael @ arch in ~ [3:20:09]
$ screenfetch
-`
@ -33,7 +33,7 @@ $ screenfetch
`++:. `-/+/
.` `/
`````
```
I installed Arch Linux on my laptop the day before yesterday. I've used Arch Linux before, about a year ago, so setup was relatively familiar. On top of Arch Linux, I'm using the very widely recommended i3 tiling window manager, and urxvt terminal emulator.

View file

@ -1,9 +1,9 @@
---
title: "Cleaning up your shell"
date: 2018-02-25
tags: ["computers", "linux", "terminal"]
languages: ["bash"]
---
+++
title = "Cleaning up your shell"
date = 2018-02-25
tags = ["computers", "linux", "terminal"]
languages = ["bash"]
+++
Is your shell loading slower than it used to? Maybe you've been sticking a bit more into your `.bashrc`/`.zshrc` than you thought. <!--more-->

View file

@ -1,8 +1,8 @@
---
title: "Fixing tmux colors"
date: 2018-04-23
tags: ["computers"]
---
+++
title = "Fixing tmux colors"
date = 2018-04-23
tags = ["computers"]
+++
Put this in your `~/.tmux.conf`.

View file

@ -1,9 +1,9 @@
---
title: "Web apps"
date: 2018-05-28
tags: ["computers", "web", "things-that-are-bad"]
languages: ["javascript"]
---
+++
title = "Web apps"
date = 2018-05-28
tags = ["computers", "web", "things-that-are-bad"]
languages = ["javascript"]
+++
The other day, I just turned off JavaScript from my browser. <!--more--> "fucking neckbeard", "you'll turn it back in 2 weeks", "living without JavaScript is like living without electricity" were some of the responses I got. And they might be right. But let's see why things are the way they are and what we can do about it.
@ -13,13 +13,13 @@ Well, the answer's pretty obvious, right? So you can surf it. But what does that
If you wanted to do anything more complicated or that required more interaction, like sending an email, you'd probably pop open a dedicated client to do it. Things like Microsoft Outlook, Mozilla Thunderbird serve as great email clients. For chat, you could use an IRC client. Hell, even the browser was a client, just for viewing webpages. If you didn't have a client for a service that you wanted to use, you'd download a client, enter in the details of the server you want to connect to, and then you would be off.
Things aren't that way anymore. For some reason, the web browser has become the all-in-one client for every service. Instead of simply acting as a HTTP client, your browser is now also capable of running full-blown 3D games, chat rooms, real-time word processors, and [full x86 emulators, apparently](http://copy.sh/v86/). What the hell happened?
Things aren't that way anymore. For some reason, the web browser has become the all-in-one client for every service. Instead of acting as a HTTP client, your browser is now also capable of running full-blown 3D games, chat rooms, real-time word processors, and [full x86 emulators, apparently](http://copy.sh/v86/). What the hell happened?
## Spoiler alert: Javascript happened
JavaScript happened. That little _scripting_ language invented to, you know, make some hover animation on your page or have dropdowns on your menu bar. Thanks to the introduction of JavaScript (and jQuery especially), developers stopped viewing webpages as Word documents that you can share, and more like canvases. Hover animations are cute and dropdowns are useful. Sure. But when this _scripting_ language starts turning into a _systems_ language (for lack of a better term), you have a problem. When's the last time you used Perl to write an operating system?
Look at the things we do today with JavaScript. We have _full blown frameworks_ that we _compile_ into bundles of _executable code_ in people's browsers. We're basically talking about the equivalent of downloading a binary and executing it on your computer every time you open a _webpage_. Except for a few minor differences. Firstly, it's not really a binary, it's a huge blob of script, which means it must be executed inside some virtual interpreter. For each tab that you're running. Secondly, now you're downloading random scripts from any website that you open, and then [trusting it](https://superlogout.com/) and [running it][1]. You wouldn't hesitate to click a link, but you'd definitely think twice before installing something from an unknown source into your computer, right?
Look at the things we do today with JavaScript. We have _full blown frameworks_ that we _compile_ into bundles of _executable code_ in people's browsers. We're talking about the equivalent of downloading a binary and executing it on your computer every time you open a _webpage_. Except for a few minor differences. Firstly, it's not really a binary, it's a huge blob of script, which means it must be executed inside some virtual interpreter. For each tab that you're running. Secondly, now you're downloading random scripts from any website that you open, and then [trusting it](https://superlogout.com/) and [running it][1]. You wouldn't hesitate to click a link, but you'd definitely think twice before installing something from an unknown source into your computer, right?
On top of that, look at these huge frameworks that almost every company is hiring developers for: React, Angular, Vue. These frameworks help JavaScript developers develop "web apps", meaning your JavaScript is now responsible for things the browser should actually be doing for you: two-way data binding, template rendering, and more. Except now, you're downloading a script and running it inside of a virtual interpreter. And because of technologies like Webpack that bundles all your separate code files together (read: static linking), our browsers can't even use the same framework code from site to site.

View file

@ -1,8 +1,9 @@
---
title: "Setting up IRC with Weechat"
date: 2018-10-18
tags: ["irc"]
---
+++
title = "Setting up IRC with Weechat"
date = 2018-10-18
tags = ["irc"]
+++
I've just recently discovered that weechat has a "relay" mode, which means it can act as a relay server to other clients (for example, my phone). If I leave an instance of weechat running on, say, my server that's always running, it can act as a bouncer and my phone can receive notifications for highlights as well.

View file

@ -1,11 +1,11 @@
---
title: "Twenty years of attacks on rsa with examples"
date: 2018-10-26
toc: true
tags: ["ctf", "crypto", "rsa"]
languages: ["python"]
math: true
---
+++
title = "Twenty years of attacks on rsa with examples"
date = 2018-10-26
toc = true
tags = ["ctf", "crypto", "rsa"]
languages = ["python"]
math = true
+++
There's [a great paper][1] I found by Dan Boneh from 1998 highlighting the
weaknesses of the RSA cryptosystem. I found this paper to be a particularly
@ -18,11 +18,11 @@ cryptosystem works, since there's already a great number of resources on that.
## Factoring large integers
Obviously this is a pretty bruteforce-ish way to crack the cryptosystem, and
probably won't work in time for you to see the result, but can still be
considered an attack vector. This trick works by just factoring the modulus,
$N$. With $N$, finding the private exponent $d$ from the public exponent $e$ is
a piece of cake.
This is a pretty bruteforce-ish way to crack the cryptosystem, and probably
won't work in time for you to see the result, but can still be considered an
attack vector. This trick works by just factoring the modulus, $N$. With $N$,
finding the private exponent $d$ from the public exponent $e$ is a piece of
cake.
Let's choose some small numbers to demonstrate this one (you can follow along in
a Python REPL if you want):
@ -33,7 +33,7 @@ a Python REPL if you want):
>>> c = 875978376
```
$N$ is clearly factorable in this case, and we can use resources like
$N$ is more easily factorable in this case, and we can use resources like
[msieve][7] or [factordb][2] to find smaller primes in this case. Since we know
now that $N = 20717 \times 42557$, we can find the totient of $N$:
@ -70,8 +70,8 @@ pairs would be issued to different users and they would share public keys with
each other and keep private keys to themselves.
The problem here is if you have a key pair, and you got someone else's public
key, you could easily derive the private key by just factoring the modulus.
Let's see how this works with a real example now.
key, you could easily derive the private key by factoring the modulus. Let's see
how this works with a real example now.
Since this is a big problem if you were to really use this cryptosystem, I'll be
using actual keys from an actual crypto library instead of the small numbers
@ -128,9 +128,9 @@ It turns out that $k$ is extremely close to $\frac{ed}{N}$:
$$ \frac{ed}{N} = \frac{1 + k\phi(N)}{N} = \frac{1}{N} + \frac{k\phi(N)}{N} $$
$\frac{1}{N}$ is basically 0, and $\phi(N)$ is very close to $N$, so it
shouldn't change the value of $k$ by very much. We now use $\frac{ed}{N}$ to
estimate $k$:
At this scale, we can treat $\frac{1}{N}$ as if it was 0, and $\phi(N)$ is very
close to $N$, so it shouldn't change the value of $k$ by very much. We now use
$\frac{ed}{N}$ to estimate $k$:
$$ \phi(N) = \frac{ed - 1}{\frac{ed}{N}} $$
@ -184,10 +184,10 @@ you to sign this with your private key." Let's generate Bob's private key:
>>> M = b"I (Bob) owes Marvin $100,000 USD"
```
Obviously, Bob, an intellectual, will refuse to sign the message. However,
suppose Marvin now transforms his message into a more innocent looking one. He
does this by turning $M$ into $M' = r^eM \mod N$ where r is an integer that's
coprime to $N$:
Bob, begin an intellectual, will refuse to sign the message. So now suppose
Marvin now transforms his message into a more innocent looking one. He does this
by turning $M$ into $M' = r^eM \mod N$ where r is an integer that's coprime to
$N$:
```py
>>> from random import randint
@ -201,14 +201,12 @@ Now he asks Bob to sign this more... innocently-looking message. Without
questioning, Bob, an intellectual, signs his life away. Let's say he produces a
signature
$$
\begin{aligned}
$$\begin{aligned}
S' &= (M'^d) \\\
&= (r^e * M)^d \\\
&= r^{ed} * M^d \\\
&= r * M^d \mod N
\end{aligned}
$$
\end{aligned}$$
```py
>>> Sp, = bob.sign(Mp, 0)

View file

@ -1,13 +1,13 @@
---
title: "Magic forms with proc macros: Ideas"
date: 2019-02-01
tags: ["computers", "web"]
languages: ["rust"]
---
+++
title = "Magic forms with proc macros: Ideas"
date = 2019-02-01
tags = ["computers", "web"]
languages = ["rust"]
+++
Procedural macros (proc macros for short) in Rust are incredible because they allow arbitrary pre-compile source transformation, which leads to endless possibilities (and hazards!). But if we take careful advantage of this feature, we can use it to make clean abstractions for messy boilerplate, especially in the case of web forms. <!--more-->
In fact, proc macros are incredibly pervasive around Rust's ecosystem. For example, using the [`serde`][1] serialization/deserialization crate, you can simply write:
In fact, proc macros are incredibly pervasive around Rust's ecosystem. For example, using the [`serde`][1] serialization/deserialization crate, you can write:
```rs
#[derive(Serialize)]
@ -42,10 +42,10 @@ What this would do is add a couple more functions to our form class. Firstly, I'
```html
<form>
<input type="email" name="id" />
<input type="text" name="name" />
<input type="password" name="pass" />
<input type="submit" />
<input type="email" name="id" />
<input type="text" name="name" />
<input type="password" name="pass" />
<input type="submit" />
</form>
```
@ -60,7 +60,7 @@ name: String,
This should generate the following HTML:
```html
<input type="text" name="name" autocomplete="off" />
<input type="text" name="name" autocomplete=off />
```
I realize this is probably not very flexible, since you'd really only be able to use this form in a specific context. But in reality, how much do you really lose by redefining that form?

View file

@ -1,8 +1,8 @@
---
title: "Accept server analogy"
date: 2019-03-04
tags: ["computers"]
---
+++
title = "Accept server analogy"
date = 2019-03-04
tags = ["computers"]
+++
This is just a stupid analogy I thought of recently, but decided to write about it anyway.

View file

@ -1,20 +1,22 @@
---
title: "Password managers"
date: 2020-04-01
tags: ["computers", "things-that-are-good", "privacy"]
---
+++
title = "Password managers"
date = 2020-04-01
tags = ["computers", "things-that-are-good", "privacy"]
+++
Password managers are programs that store passwords for you. With the number of accounts you keep on the web, you generally don't want to store all of them in your head. If you want to see articles on why you should use a password manager NOW, search "reasons to use a password manager" online and any of the articles you find should explain it. Here I'll add some more commentary on top of the traditional arguments.
<!-- more -->
## Don't tick the "Remember master password box" no matter what
Don't tick the "Remember master password box" no matter what
---
How well you remember a password depends on how much you use it. If you open an account, make a password, and stay signed in for a year without ever having to re-login, you'll naturally forget the password. Same deal with password managers; the problem has just been moved another step.
The power of a password manager comes from you continually entering in the same password over and over in order to unlock your other accounts.
## Password managers are good for a lot more than passwords
Password managers are good for a lot more than passwords
---
If you're willing to put sensitive passwords into your password manager, it should be a perfect place to put information that you'd want to avoid writing down in plaintext but want to access easily. This might include:
@ -23,7 +25,8 @@ If you're willing to put sensitive passwords into your password manager, it shou
- Your car's license plate number
- Answers to security questions, which leads into the next point:
## Treat your security questions as passwords
Treat your security questions as passwords
---
Save these in your password manager! "Security" questions are probably the worst idea for security and are more likely to weaken the security of your account than strengthen it. They have multiple fatal flaws (assuming you use security questions truthfully):
@ -33,7 +36,8 @@ Save these in your password manager! "Security" questions are probably the worst
Instead, just treat them as another password! Go into your password manager, generate the longest possible random password that fits into the box, and save it. Since you can give a name to the password, there's no worry of forgetting it or losing it, since it'll be stored among the vault of other passwords that you're hopefully using every day.
## Don't trust extensions that fill in your password automatically
Don't trust extensions that fill in your password automatically
---
Some password managers, like LastPass, have browser extensions that automatically fill in password boxes when you open the page.

View file

@ -1,14 +1,14 @@
---
title: "Tracking links in email"
date: 2021-06-17
tags: ["email", "computers", "things-that-are-bad", "privacy"]
---
+++
title = "Tracking links in email"
date = 2021-06-17
tags = ["email", "computers", "things-that-are-bad", "privacy"]
+++
You probably get emails every day, and spend a lot of time reading them. And
whenever someone performs an action or does something in vast quantities, you
_bet_ the data giants have figured out a way to capitalize on it. For many
years consumer privacy has basically gone unnoticed, and invasive tracking has
grown [viral][1]. <!--more-->
_bet_ the data giants have figured out a way to capitalize on it. For many years
consumer privacy has gone unnoticed, and invasive tracking has grown [viral][1].
<!--more-->
Arguably, if you are someone who runs a business off of writing periodic
newsletters that are distributed via email, you might want some statistics on
@ -49,18 +49,18 @@ making a request to that server, giving up information about the time, place,
client, OS, and all sorts of other information that greedy data collection
companies are waiting to snatch up.
Of course, regular users notice nothing: these links are usually hidden behind
buttons, text, or even the original URL itself. Once they click it, the website
silently logs all the data it receives about the user, and then redirects the
user to the original destination.
Regular users notice nothing: these links are usually hidden behind buttons,
text, or even the original URL itself. Once they click it, the website silently
logs all the data it receives about the user, and then redirects the user to the
original destination.
The senders usually aren't at fault either. Sending email is tricky, with all
the infrastructure set up to block out spam, so the majority of people who send
bulk mail (newsletters, websites that need to confirm your email, etc.) all go
through companies that handle this for them. Of course, being the middlemen who
actually get the mail out the door, they're free to replace the links with
whatever they want, and many of these companies advertise it as a feature to
get more "insight" into how your emails are doing.
through companies that handle this for them. Being the middlemen who actually
get the mail out the door, they're free to replace the links with whatever they
want, and many of these companies advertise it as a feature to get more
"insight" into how your emails are doing.
Even worse, the original senders aren't the only ones getting the info, either.
These middlemen could hold on to the data and there's no saying they can't use

View file

@ -1,11 +1,11 @@
---
title: "Sending https requests from scratch"
date: 2021-07-05
draft: true
toc: true
tags: ["computers", "web", "crypto"]
languages: ["python"]
---
+++
title = "Sending https requests from scratch"
date = 2021-07-05
draft = true
toc = true
tags = ["computers", "web", "crypto"]
languages = ["python"]
+++
Every now and then, I return to this age-old question of _exactly_ how hard would it be to write a web browser from scratch? I hear some interviewers ask their candidates to describe the process your browser takes to actually put a webpage on your screen, but no doubt that's a simplification of a process from 20 years ago. <!--more-->
@ -28,7 +28,7 @@ With that out of the way, let's jump in!
## URL Parsing
This part is basically just a chore. URLs are defined in [RFC 3986][1], but we'll cheat a bit and just get the important parts we want for sending a request. First, I'll write out a regex for actually matching the parts we want:
This part is just a chore. URLs are defined in [RFC 3986][1], but we'll cheat a bit and just get the important parts we want for sending a request. First, I'll write out a regex for actually matching the parts we want:
```py
import re
@ -96,7 +96,7 @@ def wrap_handshake(htype, hdata):
### Client Hello
TLS starts with the client sending a `ClientHello` message (defined in section 4.1.2 of the RFC), which basically starts the handshake off with some basic details about what the client can do. Now's probably a good time to decide on some basics, like which ciphers we'll be using to communicate.
TLS starts with the client sending a `ClientHello` message (defined in section 4.1.2 of the RFC), which starts the handshake off with some basic details about what the client can do. Now's probably a good time to decide on some basics, like which ciphers we'll be using to communicate.
#### Cipher Suite
@ -299,9 +299,9 @@ Piece of cake.
Let's walk through each of the ciphers and algorithms we're going to need one more time:
- `ecdsa_secp256r1_sha256`
- ECDSA is the elliptical-curve signature algorithm; basically it can sign some information using the elliptical-curve private key, and anyone can verify using the corresponding public key that the person who owns the key has created that signature.
- secp256r1 just gives the name of a set of established parameters for a curve.
- SHA256 is a hashing algorithm, which creates a unique fingerprint of a piece of information that can't be reversed back to the original. Python's `hashlib` library provides this function for us, so we don't have to implement it ourselves.
+ ECDSA is the elliptical-curve signature algorithm; it can sign some information using the elliptical-curve private key, and anyone can verify using the corresponding public key that the person who owns the key has created that signature.
+ secp256r1 just gives the name of a set of established parameters for a curve.
+ SHA256 is a hashing algorithm, which creates a unique fingerprint of a piece of information that can't be reversed back to the original. Python's `hashlib` library provides this function for us, so we don't have to implement it ourselves.
#### Naive Elliptical Curve Implementation
@ -395,6 +395,8 @@ def client_hello_extensions(hostname: str):
## `request`-like API
## Conclusion
What did we learn? Don't do this shit yourself, it's not worth it. We'll probably be on HTTP3 within the next year. Just import `requests` and be done with it.

View file

@ -1,29 +1,28 @@
---
title: "End-to-end encryption is useless without client freedom"
date: 2021-10-31
tags: ["computers", "privacy"]
---
+++
title = "End-to-end encryption is useless without client freedom"
date = 2021-10-31
tags = ["computers", "privacy"]
+++
Today, many companies claim to provide "end-to-end encryption" of user data,
whether it be text messages, saved pictures, or important documents. But what
does this actually mean for your data? I'll explain what "non-end-to-end"
encryption is, why end-to-end encryption is important, and also when it might
be absolutely meaningless.
<!--more-->
be absolutely meaningless.<!--more-->
> If you just want to read about end-to-end encryption, click [here][1].
> Otherwise, I'll start the story all the way back to how computers talk to
> each other.
## A game of telephone in a noisy room
A game of telephone in a noisy room
---
Computer networks essentially operate like a bunch of people yelling at each
other at a public gathering, where everyone is kind of hearing everyone else's
messages, but only really paying attention to ones addressed to them. Let's say
I wanted to grab some Chipotle with my roommate. I'd yell "HEY NATHAN, WANNA GET
CHIPOTLE?" over this public network, where Nathan would see his name, identify
it as a message that is intended for him, and then reply accordingly. Notably,
it as a message that is intended for him, and then reply accordingly. Notably,
everyone _else_ listening to the network also hears this, and knows that I'm
itching to get some Mexican food.
@ -31,11 +30,12 @@ That's not even the worst part, because well... how does your computer connect
to the internet? Your router hears your computer's message, and passes it
through a series of middlemen, who all perform this broadcasting ritual through
some local network, until it gets to wherever your computer wanted to talk to in
the first place. But in order for the middlemen to pass on the message, they'd
the first place. But in order for the middlemen to pass on the message, they'd
have to hear the message, so now my lunch has become a public gathering known to
everyone who's heard or passed on the message.
## Encryption saves the day
Encryption saves the day
---
That's where **encryption** comes in. Encryption lets me change the message to
something that the middlemen and everyone else listening on the network can't
@ -62,7 +62,8 @@ encryption to make sure no one steals my identity while I'm sending it, and
encryption _at rest_ to make sure someone breaking into Google won't be able to
just pull the hard drive out and read the files off it.
## Two halves don't equal a whole
Two halves don't equal a whole
---
It turns out just putting together these two types of encryption isn't enough.
There's someone else we haven't protected ourselves against in this case, which
@ -78,9 +79,8 @@ expect, and many industries have laws like [HIPAA][hipaa] and [FERPA][ferpa] to
make sure the people handling your data don't leak it.
But we don't have to just _trust_ them on that, because we already _know_ how
to protect data from middlemen who are simply taking a message and sending it
somewhere else unchanged, like the ISPs from our networks. We just need _more_
encryption!
to protect data from middlemen who are taking a message and sending it somewhere
else unchanged, like the ISPs from our networks. We just need _more_ encryption!
**End-to-end encryption** is just encrypting the data in a way that the only
parties allowed to read the data are the people it was intended for. Password
@ -91,7 +91,8 @@ then decrypts it offline. [Signal][signal] famously provides end-to-end
encrypted chat, so that no one, not even the government[^1], will be able to
read the messages you send if they're not the intended recipient.
## It's still not enough
It's still not enough {#not-enough}
---
End-to-end encryption seems like it should be the end of the story, but if
there's one thing that can undermine the encryption, it's the program that's
@ -139,7 +140,8 @@ run services is insufficient, it's safe to say that trusting companies to make
client software that act in the interest of their users is just as useless as
trusting companies to make services that act in the interest of their users.
## What can i do?
What can i do?
---
Although inconvenient, trusting different vendors for different pieces of this
technological assembly line is the best way to prevent it from becoming abused.
@ -168,7 +170,8 @@ multiple apps and servers, and servers can federate with each other using an
open protocol. I would strongly recommend people who are interested in privacy
to consider it.
## Conclusion
Conclusion
---
Why care? This might just seem to be some superficial political concern by
privacy advocates who warn of dangerous edge cases that only matter to people
@ -179,25 +182,22 @@ afraid of, but tech megacorps who possibly have even more power.
We live in a digital world, so it's important to know how it works and who's in
control.
[^1]:
Governments and other parties with enough computational resources may
still be able to undermine specific levels of security, or just [threaten you
personally][wrench] until they get what they want.
[^1]: Governments and other parties with enough computational resources may
still be able to undermine specific levels of security, or just [threaten you
personally][wrench] until they get what they want.
[^2]:
Large corporations typically still have majority representation in the
committees that decide on the most impactful specifications, but these are
still made available to researchers who can analyze and critique it.
[^2]: Large corporations typically still have majority representation in the
committees that decide on the most impactful specifications, but these are
still made available to researchers who can analyze and critique it.
[^3]:
Not exactly; encryption by default is not authenticated. That means while
the data is protected in transit, there's no guarantee that the recipient is
actually who they say they are. I know only the recipient received my message,
but I don't know for sure the recipient is Nathan. In practice, browsers use
[PKI][pki] infrastructure to solve this, which relies on a certificate chain
that is distributed by browser or operating system vendors.
[^3]: Not exactly; encryption by default is not authenticated. That means while
the data is protected in transit, there's no guarantee that the recipient is
actually who they say they are. I know only the recipient received my message,
but I don't know for sure the recipient is Nathan. In practice, browsers use
[PKI][pki] infrastructure to solve this, which relies on a certificate chain
that is distributed by browser or operating system vendors.
[1]: #its-still-not-enough
[1]: {{< ref "#not-enough" >}}
[csam]: https://www.apple.com/child-safety/pdf/CSAM_Detection_Technical_Summary.pdf
[ferpa]: https://en.wikipedia.org/wiki/Family_Educational_Rights_and_Privacy_Act

View file

@ -1,11 +1,12 @@
---
title: "building a formal cek machine in agda"
draft: true
date: 2022-02-02
tags: ["computer-science", "programming-languages", "formal-verification", "lambda-calculus"]
languages: ["agda"]
toc: true
---
+++
title = "building a formal cek machine in agda"
draft = true
date = 2022-02-02
tags = ["computer-science", "programming-languages", "formal-verification", "lambda-calculus"]
languages = ["agda"]
layout = "single"
toc = true
+++
<!--more-->
@ -19,36 +20,35 @@ My lambda calculus implemented `call/cc` on top of a CEK machine.
<details>
<summary><b>Why is this interesting?</b></summary>
Reasoning about languages is one way of ensuring whole-program correctness.
Building up these languages from foundations grounded in logic helps us
achieve our goal with more rigor.
Reasoning about languages is one way of ensuring whole-program correctness.
Building up these languages from foundations grounded in logic helps us
achieve our goal with more rigor.
As an example, suppose I wrote a function that takes a list of numbers and
returns the maximum value. Mathematically speaking, this function would be
_non-total_; an input consisting of an empty set would not produce reasonable
output. If this were a library function I'd like to tell people who write code
that uses this function "don't give me an empty list!"
As an example, suppose I wrote a function that takes a list of numbers and
returns the maximum value. Mathematically speaking, this function would be
_non-total_; an input consisting of an empty set would not produce reasonable
output. If this were a library function I'd like to tell people who write code
that uses this function "don't give me an empty list!"
But just writing this in documentation isn't enough. What we'd really like is
for a tool (like a compiler) to tell any developer who is trying to pass an
empty list into our maximum function "You can't do that." Unfortunately, most
of the popular languages being used today have no way of describing "a list
that's not empty."
But just writing this in documentation isn't enough. What we'd really like is
for a tool (like a compiler) to tell any developer who is trying to pass an
empty list into our maximum function "You can't do that." Unfortunately, most
of the popular languages being used today have no way of describing "a list
that's not empty."
We still have a way to prevent people from running into this problem, though
it involves pushing the problem to runtime rather than compile time. The
maximum function could return an "optional" maximum. Some languages'
implementations of optional values force programmers to handle the "nothing"
case, while others ignore it silently. But in the more optimistic case, even
if the list was empty, the caller would have handled it and treated it
accordingly.
This isn't a pretty way to solve this problem. _Dependent types_ gives us
tools to solve this problem in an elegant way, by giving the type system the
ability to contain values. This also opens its own can of worms, but for
questions about program correctness, it is more valuable than depending on
catching problems at runtime.
We still have a way to prevent people from running into this problem, though
it involves pushing the problem to runtime rather than compile time. The
maximum function could return an "optional" maximum. Some languages'
implementations of optional values force programmers to handle the "nothing"
case, while others ignore it silently. But in the more optimistic case, even
if the list was empty, the caller would have handled it and treated it
accordingly.
This isn't a pretty way to solve this problem. _Dependent types_ gives us
tools to solve this problem in an elegant way, by giving the type system the
ability to contain values. This also opens its own can of worms, but for
questions about program correctness, it is more valuable than depending on
catching problems at runtime.
</details>
## Lambda calculus
@ -59,19 +59,19 @@ be represented in a lambda calculus is some combination of terms. A term can
have several constructors:
- **Var.** This is just a variable, like `x` or `y`. During evaluation, a
variable can resolve to a value in the evaluation environment by name. If
the environment says `{ x = 5 }`, then evaluating `x` would result in 5.
variable can resolve to a value in the evaluation environment by name. If
the environment says `{ x = 5 }`, then evaluating `x` would result in 5.
- **Abstraction, or lambda (λ).** An _abstraction_ is a term that describes some
other computation. From an algebraic perspective, it can be thought of as a
function with a single argument (i.e f(x) = 2x is an abstraction, although
it would be written `(λx.2x)`)
other computation. From an algebraic perspective, it can be thought of as a
function with a single argument (i.e f(x) = 2x is an abstraction, although
it would be written `(λx.2x)`)
- **Application.** Application is sort of the opposite of abstraction, exposing
the computation that was abstracted away. From an algebraic perspective,
this is just function application (i.e applying `f(x) = 2x` to 3 would
result in 2\*3. Note that only a simple substitution has been done and
further evaluation is required to reduce 2\*3)
the computation that was abstracted away. From an algebraic perspective,
this is just function application (i.e applying `f(x) = 2x` to 3 would
result in 2\*3. Note that only a simple substitution has been done and
further evaluation is required to reduce 2*3)
### Why?
@ -86,7 +86,7 @@ like this:
- Let `z` represent zero.
- Let `s` represent a "successor", or increment function. `s(z)` represents 1,
`s(s(z))` represents 2, and so on.
`s(s(z))` represents 2, and so on.
In lambda calculus terms, this would look like:
@ -114,7 +114,7 @@ expand infinitely. That makes sense given its purpose: to find a _fixed point_
of whatever function you pass in.
> As an example, the fixed-point of the function f(x) = sqrt(x) is 1. That's
> because f(1) = 1. The Y combinator attempts to find the fixed point by simply
> because f(1) = 1. The Y combinator attempts to find the fixed point by
> applying the function multiple times. In the untyped lambda calculus, this can
> be used to implement simple (but possibly unbounded) recursion.
@ -149,12 +149,12 @@ as what the function is expecting.
A semi-formal definition for STLC terms would look something like this:
- **Var.** Same as before, it's a variable that can be looked up in the
environment.
environment.
- **Abstraction, or lambda (λ).** This is a function that carries three pieces
of information: (1) the name of the variable that its input will be
substituted for, (2) the _type_ of the input, and (3) the body in which the
substitution will happen.
of information: (1) the name of the variable that its input will be
substituted for, (2) the _type_ of the input, and (3) the body in which the
substitution will happen.
- **Application.** Same as before.
@ -171,8 +171,8 @@ Notation: (`x :: T` means `x` has type `T`, and `f · x` means `f` applied to
This also means that some values are now unrepresentable:
- `isEven(λx.2x) :: (Nat -> Bool) · (Nat -> Nat)` doesn't work because the type
of `λx.2x :: Nat -> Nat` can't be used as an input for `isEven`, which is
expecting a `Nat`.
of `λx.2x :: Nat -> Nat` can't be used as an input for `isEven`, which is
expecting a `Nat`.
We have a good foundation for writing programs now, but this by itself can't
qualify as a system for computation.

View file

@ -1,14 +1,14 @@
---
title: "The Cyber Grabs CTF: Unbr34k4bl3 (942)"
date: 2022-02-02
tags: ["ctf", "crypto", "rsa"]
languages: ["python"]
math: true
toc: true
---
+++
title = "The Cyber Grabs CTF: Unbr34k4bl3 (942)"
date = 2022-02-02
tags = ["ctf", "crypto", "rsa"]
languages = ["python"]
layout = "single"
math = true
toc = true
+++
Crypto challenge Unbr34k4bl3 from the Cyber Grabs CTF.
<!--more-->
> No one can break my rsa encryption, prove me wrong !!
@ -26,11 +26,11 @@ Looking at the source code, this challenge looks like a typical RSA challenge at
first, but there are some important differences to note:
- $n = pqr$ (line 34). This is a twist but RSA strategies can easily be
extended to 3 prime components.
extended to 3 prime components.
- $p, q \equiv 3 \mod 4$ (line 19). This suggests that the cryptosystem is
actually a [Rabin cryptosystem][Rabin].
actually a [Rabin cryptosystem][Rabin].
- We're not given the public keys $e_1$ and $e_2$, but they are related through
$x$.
$x$.
## Finding $e_1$ and $e_2$
@ -42,13 +42,11 @@ $$ 1 + e_1 + e_1^2 + \cdots + e_1^x = 1 + e_2 + e_2^2 $$
Taking the entire equation $\mod e_1$ gives us:
$$
\begin{aligned}
$$\begin{aligned}
1 &\equiv 1 + e_2 + e_2^2 \mod e_1 \\\
0 &\equiv e_2 + e_2^2 \\\
0 &\equiv e_2(1 + e_2)
\end{aligned}
$$
\end{aligned}$$
This means there are two possibilities: either $e_1 = e_2$ or $e_1$ is even
(since we know $e_2$ is a prime). The first case isn't possible, because with $x
@ -65,84 +63,68 @@ and $\boxed{e_2 = 5}$ gives us a value that make $e_2$ prime.
We're not actually given $p$ or $q$, but we are given $ip = p^{-1} \mod q$ and
$iq = q^{-1} \mod p$. In order words:
$$
\begin{aligned}
$$\begin{aligned}
p \times ip &\equiv 1 \mod q \\\
q \times iq &\equiv 1 \mod p
\end{aligned}
$$
\end{aligned}$$
We can rewrite these equations without the mod by introducing variables $k_1$
and $k_2$ to be arbitrary constants that we solve for later:
$$
\begin{aligned}
$$\begin{aligned}
p \times ip &= 1 + k_1q \\\
q \times iq &= 1 + k_2p
\end{aligned}
$$
\end{aligned}$$
We'll be trying to use these formulas to create a quadratic that we can use to
eliminate $k_1$ and $k_2$. Multiplying these together gives:
$$
\begin{aligned}
$$\begin{aligned}
(p \times ip)(q \times iq) &= (1 + k_1q)(1 + k_2p) \\\
pq \times ip \times iq &= 1 + k_1q + k_2p + k_1k_2pq
\end{aligned}
$$
\end{aligned}$$
I grouped $p$ and $q$ together here because it's important to note that since we
have $x$, we know $r$ and thus $pq = \frac{n}{r}$. This means that for purposes
of solving the equation, $pq$ is a constant to us. This actually introduces an
interesting structure on the right hand side, we can create 2 new variables:
$$
\begin{aligned}
$$\begin{aligned}
\alpha &= k_1q \\\
\beta &= k_2p
\end{aligned}
$$
\end{aligned}$$
Substituting this into our equation above we get:
$$
\begin{aligned}
$$\begin{aligned}
pq \times ip \times iq &= 1 + \alpha + \beta + \alpha\beta
\end{aligned}
$$
\end{aligned}$$
Recall from whatever algebra class you last took that $(x - x_0)(x - x_1) = x^2
\- (x_0 + x_1)x + x_0x_1$. Since we have both $\alpha\beta$ and $(\alpha +
\beta)$ in our equation, we can try to look for a way to isolate them in order
to create our goal.
$$
\begin{aligned}
$$\begin{aligned}
pq \times ip \times iq &= 1 + k_1q + k_2p + k_1k_2pq \\\
k_1k_2pq &= pq \times ip \times iq - 1 - k_1q - k_2p \\\
k_1k_2 &= ip \times iq - \frac{1}{pq} - \frac{k_1}{p} - \frac{k_2}{q}
\end{aligned}
$$
\end{aligned}$$
$\frac{1}{pq}$ is basically $0$, and since $k_1$ and $k_2$ are both smaller than
$p$ or $q$, then we'll approximate this using $k_1k_2 = ip \times iq - 1$. Now
that $k_1k_2$ has become a constant, we can create the coefficients we need:
We can treat $\frac{1}{pq}$ as $0$, and since $k_1$ and $k_2$ are both smaller
than $p$ or $q$, then we'll approximate this using $k_1k_2 = ip \times iq - 1$.
Now that $k_1k_2$ has become a constant, we can create the coefficients we need:
$$
\begin{aligned}
$$\begin{aligned}
\alpha + \beta &= pq \times ip \times iq - 1 - k_1k_2pq \\\
\alpha\beta &= k_1k_2pq
\end{aligned}
$$
\end{aligned}$$
$$
\begin{aligned}
$$\begin{aligned}
(x - \alpha)(x - \beta) &= 0 \\\
x^2 - (\alpha + \beta)x + \alpha\beta &= 0 \\\
x &= \frac{(\alpha+\beta) \pm \sqrt{(\alpha+\beta)^2 - 4\alpha\beta}}{2}
\end{aligned}
$$
\end{aligned}$$
Putting this into Python, looks like:
@ -204,7 +186,7 @@ print(long_to_bytes(m2))
# ... The last part of the flag is: 8ut_num83r_sy5t3m_15_3v3n_m0r3_1nt3r35t1n6} ...
```
This trick won't work with $c_1$ however:
This trick won't work with $c_1$ though:
```py
d1 = pow(e1, -1, phi)
@ -216,26 +198,26 @@ possibly be a $d_1$ such that $2 \cdot d_1 \equiv 1 \mod \phi$. According to
[Wikipedia][Rabin], the decryption for a standard two-prime $n$ takes 3 steps:
1. Compute the square root of $c \mod p$ and $c \mod q$:
- $m_p = c^{\frac{1}{4}(p + 1)} \mod p$
- $m_q = c^{\frac{1}{4}(q + 1)} \mod q$
- $m_p = c^{\frac{1}{4}(p + 1)} \mod p$
- $m_q = c^{\frac{1}{4}(q + 1)} \mod q$
2. Use the extended Euclidean algorithm to find $y_p$ and $y_q$ such that $y_p
\cdot p + y_q \cdot q = 1$.
3. Use the Chinese remainder theorem to find the roots of $c$ modulo $n$:
- $r_1 = (y_p \cdot p \cdot m_q + y_q \cdot q \cdot m_p) \mod n$
- $r_2 = n - r_1$
- $r_3 = (y_p \cdot p \cdot m_q - y_q \cdot q \cdot m_p) \mod n$
- $r_4 = n - r_3$
- $r_1 = (y_p \cdot p \cdot m_q + y_q \cdot q \cdot m_p) \mod n$
- $r_2 = n - r_1$
- $r_3 = (y_p \cdot p \cdot m_q - y_q \cdot q \cdot m_p) \mod n$
- $r_4 = n - r_3$
4. The real message could be any $r_i$, but we don't know which.
Converting this to work with $n = pqr$, it looks like:
1. Compute the square root of $c \mod p$, $c \mod q$, and $c \mod r$:
- $m_p = c^{\frac{1}{4}(p + 1)} \mod p$
- $m_q = c^{\frac{1}{4}(q + 1)} \mod q$
- $m_r = c^{\frac{1}{4}(r + 1)} \mod r$
- $m_p = c^{\frac{1}{4}(p + 1)} \mod p$
- $m_q = c^{\frac{1}{4}(q + 1)} \mod q$
- $m_r = c^{\frac{1}{4}(r + 1)} \mod r$
2. Using the variable names from [AoPS][CRT]'s definition of CRT:
- For $k \in \\{ p, q, r \\}, b_k = \frac{n}{k}$.
- For $k \in \\{ p, q, r \\}, a_k \cdot b_k \equiv 1 \mod k$.
- For $k \in \\{ p, q, r \\}, b_k = \frac{n}{k}$.
- For $k \in \\{ p, q, r \\}, a_k \cdot b_k \equiv 1 \mod k$.
3. Let $r = \displaystyle\sum_k^{\\{ p, q, r \\}} \pm (a_k \cdot b_k \cdot m_k) \mod n$.
4. The real message could be any $r$, but we don't know which.

View file

@ -1,8 +1,8 @@
---
title: "Clangd in Nix"
date: 2022-03-03
tags: ["nixos"]
---
+++
title = "Clangd in Nix"
date = 2022-03-03
tags = ["nixos"]
+++
I've been using [Nix][NixOS] a lot recently since it handles dependency
management very cleanly, but one gripe that I've been having is that when I'm

View file

@ -1,11 +1,11 @@
---
title: "Learn by Implementing Elliptic Curve Crypto"
date: 2022-03-04
tags: ["crypto", "learn-by-implementing"]
draft: true
math: true
toc: true
---
+++
title = "Learn by Implementing Elliptic Curve Crypto"
date = 2022-03-04
tags = ["crypto", "learn-by-implementing"]
draft = true
math = true
toc = true
+++
Good places to start (in terms of usefulness):
@ -80,7 +80,6 @@ import (
```
> This is a [literate document][literate]. You can run this blog post using [Markout]:
>
> ```
> TODO:
> ```
@ -101,11 +100,9 @@ Addition on $P$ and $Q$ is defined by first finding the line $PQ$, determining
the point $-R$ where it intersects the curve again, and then returning $R$. We
can find the line $PQ$ by using high school geometry:
$$
\begin{aligned}
$$\begin{aligned}
(y - y_0) = m(x - x_0)
\end{aligned}
$$
\end{aligned}$$
```go
func (A Point) Add(B Point) Point {

View file

@ -1,14 +1,12 @@
---
title: "Installing NixOS on ZFS with encryption"
date: 2022-05-09
tags: ["nixos", "linux", "setup"]
toc: true
---
+++
title = "Installing NixOS on ZFS with encryption"
date = 2022-05-09
tags = ["nixos", "linux", "setup"]
toc = true
+++
I finally switched over to NixOS for my desktop, and here is my install process.
<!--more--> Annoyingly enough, the biggest non-declarative part of Nix is this
initial setup phase, and I made several mistakes during the install process, so
I'm documenting the process here so I can remember for next time.
@ -19,7 +17,7 @@ I'm documenting the process here so I can remember for next time.
- SSD1: 1TB Samsung SSD 860 (encrypted), which I'm migrating off of
- SSD2: 2TB Crucial MX500 (encrypted), which I'm migrating to
- HDD: 3TB HITACHI HUA72303 (unencrypted), which serves as storage for music
and games.
and games.
I already have my [Nix flake][1] setup for my other machines, but of those only
my server runs NixOS. Instead, all my other machines use Arch Linux with just

View file

@ -1,8 +1,8 @@
---
title: "Mastery-Based Learning"
date: 2022-07-24
tags: ["education"]
---
+++
title = "Mastery-Based Learning"
date = 2022-07-24
tags = ["education"]
+++
A thought I've been brewing probably since undergrad is the idea of
mastery-based education for skill-based practices. Directly inspired by the
@ -21,9 +21,9 @@ specific purposes:
- **Dependency.** The linked topic needs some percentage of mastery in order
to best experience the current topic.
- **Spill.** Not really sure what a good term for this would be, but basically
mastery of the current topic would result in some percentage of "spilled"
mastery gain for the linked topic.
- **Spill.** Not really sure what a good term for this would be, but mastery of
the current topic would result in some percentage of "spilled" mastery gain
for the linked topic.
The dependency aspect allows people to work backwards, starting from what they
don't know and being able to query what the required context is. They can build
@ -51,8 +51,8 @@ The vision would be something like this:
2. Then, the student goes off and learns some related material. After a length
of time has passed, they are quizzed on the first topic again. By this time,
their mastery score should fall a bit simply on the basis of "forgetfulness
over time".
their mastery score should fall a bit on the basis of "forgetfulness over
time".
3. At the end of the semester, if they have reached required thresholds of
mastery in each required topic, they will pass the class.

View file

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 24 KiB

View file

@ -1,9 +1,10 @@
---
title: "UIUCTF 2022 Writeups"
date: 2022-08-01
tags: ["ctf"]
math: true
---
+++
title = "UIUCTF 2022 Writeups"
date = 2022-08-01
tags = ["ctf"]
layout = "single"
math = true
+++
Last weekend I took some time on Friday to do UIUCTF 2022. I placed 102nd with
462 points. Here are the writeups to the challenges I solved. My full workspace
@ -46,8 +47,8 @@ foreach ($allowed_extensions as $extension) {
The flaw here is that the file extension is only checked for existence, not that
it actually occurs at the end of the file. As a result, the web server will
guess the kind of data in the file based on the final extension. We can simply
upload a file named `hello.jpg.php` and it will pass this.
guess the kind of data in the file based on the final extension. We can upload a
file named `hello.jpg.php` and it will pass this.
The Dockerfile in the handout helpfully tells us the location of the flag, so
here is the file I uploaded:
@ -73,7 +74,7 @@ all of the objects except the two flag halves.
[blender]: https://www.blender.org
![Hiding blender objects](../../../assets/ctf/blender-objects.png)
![Hiding blender objects](blender-objects.png)
This reveals two QR codes that can be combined into the flag.
@ -114,8 +115,8 @@ to figure it out.
> anyone who may want it. https://military-grade-encryption-web.chal.uiuc.tf/
I'm not really sure what the clever way to solve this was, but all the numbers
were small enough to be brute forced, so that's what I did. Simply reverse the
process used and look for a string beginning with `uiuctf{`.
were small enough to be brute forced, so that's what I did: reverse the process
used and look for a string beginning with `uiuctf{`.
[solve script](https://git.sr.ht/~mzhang/uiuctf-2022/tree/master/item/crypto/military-grade-encryption/solve.py)
@ -128,7 +129,7 @@ challenge of RSA. This time, rather than starting with a modulus and trying to
discover the private exponent, we are given the private exponent, and trying to
find the modulus.
As a reminder, once we find $N$, we can simply evaluate $c^d \mod N$ to
As a reminder, once we find $N$, we can evaluate $c^d \mod N$ to
determine the final message.
First, we know that the relationship between $e$ and $d$ is that $ed \equiv 1
@ -145,8 +146,8 @@ of 8.
[external factoring service]: https://www.alpertron.com.ar/ECM.HTM
Trusty old combinatorics tells us ${16 \choose 8} = 12870$, which is easily
iterable within a matter of seconds. After picking 8 primes, we simply run
through the same process as `gen_prime` in order to generate our $p$ and $q$:
iterable within a matter of seconds. After picking 8 primes, we run through the
same process as `gen_prime` in order to generate our $p$ and $q$:
```python
for perm in tqdm(perms):
@ -196,8 +197,8 @@ This means that even with the absence of one of the required points, we can
still reasonably recover the curve with the added information that all
coefficients are less than $500,000$.
I simply threw all of the constraints we are given into the z3 solver, and
within milliseconds the answer was given. After that it was just a matter of
hooking it up to the challenge server and waiting for the flag.
I threw all of the constraints we are given into the z3 solver, and within
milliseconds the answer was given. After that it was just a matter of hooking it
up to the challenge server and waiting for the flag.
[solve script](https://git.sr.ht/~mzhang/uiuctf-2022/tree/master/item/crypto/wringing-rings/solve.py)

View file

@ -1,11 +1,12 @@
---
title: "Higher Inductive Types"
date: 2022-09-20
tags: ["type-theory"]
toc: true
math: true
draft: true
---
+++
title = "Higher Inductive Types"
slug = "higher-inductive-types"
date = 2022-09-20
tags = ["type-theory"]
toc = true
math = true
draft = true
+++
**Higher inductive types (HIT)** are central to developing cubical type theory.
What this article will try to do is develop an approach to understanding higher
@ -13,7 +14,8 @@ inductive types based on my struggles to learn the topic.
<!--more-->
## Ordinary inductive types
Ordinary inductive types
---
So first off, what is an inductive type? These are a kind of data structure
that's commonly used in _functional programming_. For example, consider the
@ -40,9 +42,9 @@ representation using this data structure:
Why is this representation useful? Well, if you remember **proof by induction**
from maybe high school geometry, you'll recall that we can prove things about
all natural numbers by simply proving that it's true for the _base case_ 0, and
then proving that it's true for any _inductive case_ $n$, given that the
previous case $n - 1$ is true.
all natural numbers by proving that it's true for the _base case_ 0, and then
proving that it's true for any _inductive case_ $n$, given that the previous
case $n - 1$ is true.
This kind of definition of natural numbers makes this induction structure much
more clear. For example, look at the definition of a tree:
@ -54,11 +56,11 @@ data Tree (A : Set) : Set where
right : Tree A → Tree A
```
We can do induction on trees by simply proving it's true for (1) the base case,
(2) the left case, and (3) the right case. In fact, all inductive data
structures have this kind of induction principle. So say you wanted to prove
that $1 + 2 + 3 + \cdots + n = \frac{n\cdot(n+1)}{2}$ for all $n \in
\mathbb{N}$, then you could say:
We can do induction on trees by proving it's true for (1) the base case, (2) the
left case, and (3) the right case. In fact, all inductive data structures have
this kind of induction principle. So say you wanted to prove that $1 + 2 + 3 +
\cdots + n = \frac{n\cdot(n+1)}{2}$ for all $n \in \mathbb{N}$, then you could
say:
<details>
<summary>(click here for boring requisites)</summary>
@ -174,7 +176,8 @@ Using Agda, we can see that this type-checks correctly.
**TODO:** Ensure totality
## Higher inductive types
Higher inductive types
---
Moving on, we want to know what _higher_ inductive types brings to the table. To
illustrate its effect, let's consider the following scenario: suppose you have

View file

@ -1,12 +1,12 @@
---
title: "Dependent Types from First Principles"
slug: "dependent-types"
date: 2022-10-27
tags: ["type-theory"]
toc: true
math: true
draft: true
---
+++
title = "Dependent Types from First Principles"
slug = "dependent-types"
date = 2022-10-27
tags = ["type-theory"]
toc = true
math = true
draft = true
+++
It's frequently said that computers are just made of 1s and 0s. As programmers,
we induce structure into this sea of 1s and 0s to build a rich collection of
@ -47,8 +47,8 @@ several basic kinds of data, but without distinction, it's hard to tell them
apart. So programmers developed **types**, ways of categorizing data so when
we're just reading a series of 0s and 1s, we would know how to interpret it.
| Raw | Interpreted as number | Interpreted as text |
| ---------------- | --------------------- | ------------------- |
| `68 65 6c 6c 6f` | 448,378,203,247 | `hello` |
|Raw|Interpreted as number|Interpreted as text|
|---|---|---|
|`68 65 6c 6c 6f`|448,378,203,247|`hello`|
So now we know how to

View file

@ -0,0 +1,63 @@
+++
title = "Decentralized Identity, a Middle Ground"
date = 2022-10-30T03:04:51-05:00
logseq = true
+++
- With Twitter management changing, I'm starting to think about the small web again. The small web is a concept where you use the Internet to interact with people directly, rather than operating through the medium of megacorps. When we think about big monoliths like Twitter and Facebook, at the very heart of it is some kind of algorithm picking what it is we see. And with the power to control that gives us advertising and mass manipulation.
- For the rest of this blog post, I'll be talking about _microblogging_ specifically: short posts of several sentences, along with a few media attachments, of which Twitter is one of the largest monolithic implementations. But the things I talk about are also applicable to other applications, including long-form blogging (Medium), image sharing (Instagram, Snapchat), video sharing (YouTube), chat (Messenger)
- There's several projects out there that are already trying out other extremes. I'll cover two big types here:
- **Federation.** This style of application development is inspired by email. The idea is that you develop a common _protocol_ for apps living on different servers to talk to each other, so you can have people choose their own servers rather than trusting everything to megacorps like Google or Facebook. One notable example of this in the microblogging realm is Mastodon / ActivityPub.
- **Peer-to-peer.** This style of application development is rarer, but can be seen commonly in things like torrents. The idea is that _each_ client holds all application info, and interacts with everything else as if they were all clients. One notable example of this in the microblogging realm is Scuttlebutt.
- I'll go into more detail about these two styles later, but they both come with their advantages and drawbacks. I don't think any of the solutions that exist are sustainable for longer periods of time.
- ## Feature Evaluation
- One thing I've been doing recently when I think about software applications and system design is break things down into their **features**. This makes it a lot easier to compare applications or services to each other than a vague statement like "I've had a good/bad experience with this". So let's come up with some features of a good microblogging platform:
- Application design
- **Verifiability:** It should be easy to verify if I did indeed ever say something.
- **Freedom:** Within reason, I should not be limited in what I say or do on the platform.
- **Universality:** I should be able to communicate to anyone from anywhere (any device, any location).
- System design
- **Availability:** I want to be confident that the system will be around for many years.
- **Connectability:** I should be able to connect with people I don't already know easily.
- **Moderation:** I should be able to choose to not interact with a certain subset of users, and this process should not be painful.
- Scaling
- **User scaling:** Supporting more users should incur a relatively linear or sublinear cost, and should certainly not lead to resource consumption blowup.
- **Temporal scaling:** The system should support being run for an arbitrarily long amount of time.
- Development
- **Malleability:** How easy is it to introduce / roll out new versions and features? This may be important for updating cryptographic algorithms.
- **Version drift:** Is it possible to support users who are on different version of the service?
- Some features are certainly more important than others, and may have different importance to different people. But it gives me a base line to evaluate services, and lets me decide which features I consider to be critical.
- These are not the only features, but I like to list them in this form because while some
alternative apps may tout certain features, it's useless if it falls short on something more important.
- ### Evaluation
- Now that we know what we're looking for, let's evaluate some existing services.
- **Mastodon**
- Satisfies:
- **Connectability:** It's easy to find other users based on their user ID, which looks like an email so it's easy to share.
- Doesn't satisfy:
- **Freedom:** While certain instances may be specialized (i.e hobby-focused), it's certainly not great when you are locked out of communicating with someone on another instance because the admins of your respective instances have beef with each other.
- **Availability:** For some reason or another, I've seen Mastodon admins give up on their instances. Running an instance is very costly in terms of money and time, so it's not surprising that if an instance owner decides it's no longer worth it, the users are pretty much hosed. While there are some options for migration like redirection, it's useless if the original server doesn't stay online.
- **Scuttlebutt**
- Satisfies:
- **Freedom:** This is the maximally free solution: your client is solely responsible for receiving and filtering all messages.
- Doesn't satisfy:
- **Connectability:** Connecting with someone must take place over an existing channel (i.e either meeting in meatspace or sharing a pub). If I don't have my Scuttlebutt client with me, then it's not possible to get updates.
- **Universality:** Since user identity is heavily tied to a single client, having multiple clients for a single user is not a well-supported workflow.
- The Matrix people have also developed [Cerulean], a playground for testing the idea of _relative reputation_, where instead of doing moderation of all users you can assign a web-of-trust-style reputation to people you follow. On the other hand, this may lead to further filter-bubbling, the implications of which I'm honestly not sure how to tackle.
[Cerulean]: https://matrix.org/blog/2020/12/18/introducing-cerulean#whats-with-the-decentralised-reputation-button
- ## Decentralized Identity
- My view for an ideal microblogging platform would be a federated server-client architecture, but where a user identity isn't tied to the server the same way a user is tied to their Mastodon instance. Essentially, you want your account to be promiscuous between several servers.
- This way, you gain all of the benefits of having a server-client architecture (easy to do multiple clients), while not relying on the server itself or its admins to stay around for a long time.
- The problem is, designing such a system securely is difficult. Ultimately, what I'm proposing is:
- A user identity model based on an asymmetric-key model. Users generate their own key locally and provide it to instances.
- Users should then have _client_ keys, which are independent of the user keys. These are signed by the user key to indicate that they belong to the same user.
- When a user wants to switch servers, they can begin requesting a checkout of their data from the server. The data can be signed by the server as well to provide an indicator that the client isn't trying to forge old history.
- One problem is if a server wants to deny a user from switching servers; they may try to keep the data hostage. One solution is to scrape all publicly obtainable data, the other is to hope that this is socially discouraged by having users avoid servers that engage in user-hostile actions.
- Users may also opt to replicate their identity entirely between two servers. This means that this synchronization process is just happening incrementally or periodically rather than just at switch-time.
- This still has a number of open problems:
- **Key revocation.** Anything involving cryptographic keys requires a revocation protocol in the event that the keys are compromised. I'm not too sure of a good way of doing this that doesn't involve an infinitely growing append-only list of revocations on all of the people who have
- When I have some free time, I may take a crack at a proof-of-concept implementation of a system like this.
- ### Further applications
- Since this scheme only really targets the identity layer with (theoretically) asymptotically constant change required for other parts of the software, this should be able to be extended to things other than just microblogging. I'm not sure about the level of success this may have for things like sharing large data files such as video (maybe mix in something like IPFS?), but there is certainly room for exploration.

View file

@ -1,9 +1,10 @@
---
title: "Rust's Impure Path"
date: 2022-10-30T12:47:41-05:00
languages: ["rust"]
tags: ["rust"]
---
+++
title = "Rust's Impure Path"
date = 2022-10-30T12:47:41-05:00
languages = ["rust"]
tags = ["rust"]
+++
I work on [garbage], a project that touches the filesystem a lot. Because of
this, I'd really like to control when and where these filesystem accesses

View file

@ -1,18 +1,17 @@
---
title: "Inductive Types"
date: 2023-03-26
tags: ["type-theory"]
math: true
draft: true
toc: true
language_switcher_languages: ["ocaml", "python"]
---
+++
title = "Inductive Types"
date = 2023-03-26
tags = ["type-theory"]
math = true
draft = true
toc = true
language_switcher_languages = ["ocaml", "python"]
+++
There's a feature common to many functional languages, the ability to have
algebraic data types. It might look something like this:
{{< language-switcher >}}
```ocaml
type bool =
| True
@ -27,15 +26,6 @@ that has two _constructors_, or ways to create this type. The constructors are
2. Any time I use the value `False`, it's understood to have type `bool`.
3. In addition, there are _no_ other ways to create values of `bool` other than combining `True` and `False` constructors.
---
```python
from typing import Literal
MyBool = Literal[True] | Literal[False]
```
{{</ language-switcher >}}
> **Note:** I'm using an experimental language switcher. It's implemented in
> pure CSS using a feature called the [`:has` pseudo-class][has]. As of writing,
> all major browsers _except_ Firefox has it implemented and enabled by default.
@ -59,23 +49,12 @@ unknown value of type `Boolean`, you know it can only take one of two values.
There's actually nothing special about boolean itself. I could just as easily
define a new type, like this:
{{< language-switcher >}}
```ocaml
type WeirdType =
| Foo
| Bar
```
---
```python
from typing import Literal
WeirdType = Literal['foo'] | Literal['bar']
```
{{</ language-switcher >}}
Because this type can only have two values, it's _semantically_ equivalent to
the `Boolean` type. I could use it anywhere I would typically use `Boolean`.
@ -94,22 +73,11 @@ You can make any _finite_ type like this: just create an algebraic data type
with unit constructors, and the result is a type with a finite cardinality. If I
wanted to make a unit type for example:
{{< language-switcher >}}
```ocaml
type unit =
| Unit
```
---
```python
from typing import Literal
Unit = Literal[None]
```
{{</ language-switcher >}}
There's only one way to ever construct something of this type, so the
cardinality of this type would be 1.
@ -123,8 +91,6 @@ called structural matching in some languages).
Let's see an example. Suppose I have a type with three values, defined like
this:
{{< language-switcher >}}
```ocaml
type direction =
| Left
@ -132,21 +98,10 @@ type direction =
| Right
```
---
```python
from typing import Literal
Direction = Literal['left'] | Literal['middle'] | Literal['right']
```
{{</ language-switcher >}}
If I was given a value with a type of direction, but I wanted to do different
things depending on exactly which direction it was, I could use _pattern
matching_ like this:
{{< language-switcher >}}
```ocaml
let do_something_with (d : direction) =
match d with
@ -155,25 +110,6 @@ let do_something_with (d : direction) =
| Right -> do_this_if_right
```
---
```python
def do_something_with(d : Direction) -> str:
match inp:
case 'left': return do_this_if_left
case 'middle': return do_this_if_middle
case 'right': return do_this_if_right
case _: assert_never(inp)
```
**Note:** the `assert_never` is a static check for exhaustiveness. If we missed
a single one of the cases, a static type checker like [pyright] could catch it
and tell us which of the remaining cases there are.
[pyright]: https://github.com/microsoft/pyright
{{</ language-switcher >}}
This gives me a way to discriminate between the different variants of
`direction`.
@ -203,16 +139,12 @@ contain themselves as a type).
You can see an example of this here:
{{< language-switcher >}}
```ocaml
type nat =
| Suc of nat
| Zero
```
{{</ language-switcher >}}
These are the natural numbers, which are defined inductively. Each number is
just represented by a data type that wraps 0 that number of times. So 3 would be
`Suc (Suc (Suc Zero))`.
@ -235,19 +167,18 @@ let rec is_even = fun (n : nat) ->
<details>
<summary>Try it for yourself</summary>
If you've got an OCaml interpreter handy, try a couple values for yourself and
convince yourself that this accurately represents the naturals and an even
testing function:
If you've got an OCaml interpreter handy, try a couple values for yourself and
convince yourself that this accurately represents the naturals and an even
testing function:
```ocaml
utop # is_even Zero;;
- : bool = true
utop # is_even (Suc Zero);;
- : bool = false
```
This is a good way of making sure the functions you write make sense!
```ocaml
utop # is_even Zero;;
- : bool = true
utop # is_even (Suc Zero);;
- : bool = false
```
This is a good way of making sure the functions you write make sense!
</details>
## Induction principle
@ -256,11 +187,11 @@ Let's express this in the language of mathematical induction. If I have any
natural number:
- If the natural number is the base case of zero, then the `is_even` relation
automatically evaluates to true.
automatically evaluates to true.
- If the natural number is a successor, invert the induction hypothesis (which is
what `is_even` evaluates to for the previous step, a.k.a whether or not the
previous natural number is even), since every even number is succeeded by
an odd number and vice versa.
what `is_even` evaluates to for the previous step, a.k.a whether or not the
previous natural number is even), since every even number is succeeded by
an odd number and vice versa.
Once these rules are followed, by induction we know that `is_even` can run on
any natural number ever. In code, this looks like:
@ -303,6 +234,8 @@ like this:
let convert_nat = nat_transformer 0 (fun x -> x + 1)
```
TODO: Talk about https://counterexamples.org/currys-paradox.html
[has]: https://developer.mozilla.org/en-US/docs/Web/CSS/:has

View file

@ -1,8 +1,8 @@
---
title: "Getting a shell in a Docker Compose container without any shells"
date: 2023-03-29
tags: ["docker", "linux"]
---
+++
title = "Getting a shell in a Docker Compose container without any shells"
date = 2023-03-29
tags = ["docker", "linux"]
+++
First (published) blog post of the year! :raising_hands:
@ -12,7 +12,6 @@ container without exiting it. Some Docker containers are incredibly stripped
down to optimize away bloat (which is good!) but this may make debugging them
relatively annoying.
> [!admonition: NOTE]
> These are my specific steps for running it, please replace the paths and
> container names with the ones relevant to your specific use-case.

View file

@ -1,8 +1,8 @@
---
title: "Developing on projects without flake.nix on NixOS"
date: 2023-04-20
tags: ["linux", "nixos"]
---
+++
title = "Developing on projects without flake.nix on NixOS"
date = 2023-04-20
tags = ["linux", "nixos"]
+++
Ever since I became a NixOS hobbyist a few years ago, it's easy to plug NixOS
wherever I go. The way flakes create a reproducible development environment
@ -50,8 +50,8 @@ kind of existing configuration, I have _no_ dependencies, and if I want to even
build it, I would have to write a flake myself. That's fine and all, but
typically the `flake.nix` file lives in the root directory of the project, and
it's good practice in the Nix world to commit the flake file along with its
corresponding lock file. However, the upstream project may not appreciate it if
I shove new config files in their root directory.
corresponding lock file. The upstream project may not appreciate me shoving new
config files in their root directory.
```
  project
@ -61,7 +61,6 @@ I shove new config files in their root directory.
 flake.nix ✗
```
> [!admonition: NOTE]
> The `✗` indicates that I added the file to the project, and it hasn't been
> committed to the repo yet.
@ -131,10 +130,9 @@ project structure should look a bit more like this:
 flake.nix
```
> [!admonition: NOTE]
> Remember, since you moved the `.envrc` file, you will need to run `direnv allow`
> again. Depending on how you moved it, you might also need to change the path you
> wrote in the `use flake` command.
> Remember, since you moved the `.envrc` file, you will need to run `direnv
> allow` again. Depending on how you moved it, you might also need to change the
> path you wrote in the `use flake` command.
With this setup, the `project` directory can contain a clean clone of upstream
and your flake files will create the appropriate environment.

View file

@ -1,10 +1,10 @@
---
title: "Programmable Proofs"
date: 2023-04-20
tags: ["type-theory"]
math: true
draft: true
---
+++
title = "Programmable Proofs"
date = 2023-04-20
tags = ["type-theory"]
math = true
draft = true
+++
Today I'm going to convince you that data types in programming are the same as
[_propositions_][proposition] in the logical sense. Using this, we can build an
@ -37,15 +37,13 @@ Simple enough. `Cake` is a type now. What does being a type mean?
- We can use it in function definitions, like this:
```ts
function giveMeCake(cake: Cake) {
/* ... */
}
function giveMeCake(cake: Cake) { ... }
```
- We can put it in other types, like this:
```ts
type Party = { cake: Cake; balloons: Balloon[] /* ... */ };
type Party = { cake: Cake, balloons: Balloon[], ... };
```
This creates a `Party` object that contains a `Cake`, some `Balloon`s, and
@ -82,7 +80,7 @@ function getUsername(id: number): string {
if (!isIdValid(id)) throw new Error("Invalid ID");
// Ok, now we are sure that id is valid.
// ...
...
}
```
@ -94,7 +92,7 @@ around numbers and doing the check every time.
function getUsername(id: UserId): string {
// We can just skip the first part entirely now!
// Because of the type of the input, we are sure that id is valid.
// ...
...
}
```
@ -166,7 +164,7 @@ object, it's going to be valid.
function getUsername(id: UserId): string {
// We can just skip the first part entirely now!
// Because of the type of the input, we are sure that id is valid.
// ...
...
}
```
@ -199,10 +197,8 @@ But what about our use cases?
this:
```ts
function giveMeCake(cake: NoCake) {
/* ... */
}
type Party = { cake: NoCake; balloons: Balloon[] /* ... */ };
function giveMeCake(cake: NoCake) { ... }
type Party = { cake: NoCake, balloons: Balloon[], ... };
```
Interestingly, this actually still typechecks! Why is that?
@ -310,9 +306,9 @@ parties of logical thought that are involved here. Let's meet them:
sort. Another notable difference is that intuitionistic does not consider the
Law of Excluded Middle to be true.
However, intuitionistic logic is preferred in a lot of mechanized theorem
provers, because it's easier computationally to talk about "having a witness".
Let's actually look more closely at what it means to have a "witness".
Between the two, intuitionistic logic is preferred in a lot of mechanized
theorem provers, because it's easier computationally to talk about "having a
witness". Let's actually look more closely at what it means to have a "witness".
If you didn't read the section earlier about using private constructors to
restrict type construction, I'm going to revisit the idea a bit here. Consider
@ -320,16 +316,11 @@ this very restrictive hash map:
```ts
namespace HashMap {
class Ticket {
/* ... */
} // Private to namespace
class Ticket { ... } // Private to namespace
export class HashMap {
// ...
public insert(k: string, v: number): Ticket {
/* ... */
}
...
public insert(k: string, v: number): Ticket { ... }
public getWithoutTicket(k: string): number | null {
if (!this.hasKey(k)) return null;
@ -344,8 +335,8 @@ namespace HashMap {
```
The idea behind this is kind of like having a witness. Whenever you insert
something into the map, you get back a ticket that basically proves that it was
the one that inserted it for you in the first place.
something into the map, you get back a ticket that proves that it was the one
that inserted it for you in the first place.
Since we used a namespace to hide access to the `Ticket` class, the only way you
could have constructed a ticket is by inserting into the map. The ticket then
@ -377,8 +368,8 @@ function returnUnit(): null {
```
There's nothing else that fits in this type. (I realize `void` and `undefined`
also exist and are basically single-valued types, but they have other weird
behavior that makes me not want to lump them in here)
also exist and are single-valued types, but they have other weird behavior that
makes me not want to lump them in here)
Let's just also jump into two-valued types, which in math we just call
$\mathbf{2}$. This type has a more common analog in programming, it's the
@ -403,12 +394,8 @@ class First {
static instance: First = new First();
private constructor() {}
}
class Second {
/* ... */
} // same thing
class Third {
/* ... */
} // same thing
class Second { ... } // same thing
class Third { ... } // same thing
```
The reason I slapped on a private constructor and included a singleton

View file

@ -1,5 +1,5 @@
---
title: "Formally proving true ≢ false in Homotopy Type Theory with Agda"
title: "Proving true ≢ false"
slug: "proving-true-from-false"
date: 2023-04-21
tags: ["type-theory", "agda"]
@ -12,9 +12,20 @@ math: true
These are some imports that are required for code on this page to work properly.
```agda
open import Prelude
```
{-# OPTIONS --cubical #-}
open import Cubical.Foundations.Prelude
open import Data.Bool
open import Data.Unit
open import Data.Empty
¬_ : Set → Set
¬ A = A → ⊥
infix 4 _≢_
_≢_ : ∀ {A : Set} → A → A → Set
x ≢ y = ¬ (x ≡ y)
```
</details>
The other day, I was trying to prove `true ≢ false` in Agda. I would write the
@ -28,6 +39,7 @@ For many "obvious" statements, it suffices to just write `refl` since the two
sides are trivially true via rewriting. For example:
```
open import Data.Nat
1+2≡3 : 1 + 2 ≡ 3
1+2≡3 = refl
```
@ -48,7 +60,7 @@ left side so it becomes judgmentally equal to the right:
- suc (suc (suc zero))
- 3
However, in Agda, naively using `refl` with the inverse statement doesn't work.
In cubical Agda, naively using `refl` with the inverse statement doesn't work.
I've commented it out so the code on this page can continue to compile.
```
@ -58,7 +70,7 @@ I've commented it out so the code on this page can continue to compile.
It looks like it's not obvious to the interpreter that this statement is
actually true. Why is that
## Intuition
---
Well, in constructive logic / constructive type theory, proving something is
false is actually a bit different. You see, the definition of the `not`
@ -82,7 +94,7 @@ The strategy here is we define some kind of "type-map". Every time we see
`false`, we'll map it to empty.
```
bool-map : Bool → Set
bool-map : Bool → Type
bool-map true =
bool-map false = ⊥
```
@ -92,69 +104,43 @@ over a path (the path supposedly given to us as the witness that true ≢ false)
will produce a function from the inhabited type we chose to the empty type!
```
true≢false p = transport bool-map p tt
true≢false p = transport (λ i → bool-map (p i)) tt
```
I used `true` here, but I could equally have just used anything else:
```
bool-map2 : Bool → Set
bool-map2 : Bool → Type
bool-map2 true = 1 ≡ 1
bool-map2 false = ⊥
true≢false2 : true ≢ false
true≢false2 p = transport bool-map2 p refl
true≢false2 p = transport (λ i → bool-map2 (p i)) refl
```
## Note on proving divergence on equivalent values
> [!admonition: NOTE]
> Update: some of these have been commented out since regular Agda doesn't support higher inductive types
---
Let's make sure this isn't broken by trying to apply this to something that's
actually true, like this higher inductive type:
actually true:
```text
data NotBool : Type where
true1 : NotBool
true2 : NotBool
same : true1 ≡ true2
```
2-map : → Type
2-map 2 =
2-map 2 = ⊥
2-map else =
-- 2≢2 : 2 ≢ 2
-- 2≢2 p = transport (λ i → 2-map (p i)) tt
```
In this data type, we have a path over `true1` and `true2` that is a part of the
definition of the `NotBool` type. Since this is an intrinsic equality, we can't
map `true1` and `true2` to divergent types. Let's see what happens:
I commented the lines out because they don't compile, but if you tried to
compile it, it would fail with:
```text
notbool-map : NotBool → Type
notbool-map true1 =
notbool-map true2 =
!=<
when checking that the expression transport (λ i → 2-map (p i)) tt
has type
```
Ok, we've defined the same thing that we did before, but Agda gives us this
message:
```text
Errors:
Incomplete pattern matching for notbool-map. Missing cases:
notbool-map (same i)
when checking the definition of notbool-map
```
Agda helpfully notes that we still have another case in the inductive type to
pattern match on. Let's just go ahead and give it some value:
```text
notbool-map (same i) =
```
If you give it ``, it will complain that `⊥ != of type Type`, but if you give
it `⊥`, it will also complain! Because pattern matching on higher inductive
types requires a functor over the path, we must provide a function that
satisfies the equality `notbool-map true1 ≡ notbool-map true2`, which unless we
have provided the same type to both, will not be possible.
So in the end, this type `NotBool → Type` is only possible to write if the two
types we mapped `true1` and `true2` can be proven equivalent. But this also
means we can't use it to prove `true1 ≢ true2`, which is exactly the property we
wanted to begin with.
That's because with identical terms, you can't simultaneously assign them to
different values, or else it would not be a proper function.

3
content/posts/posts.json Normal file
View file

@ -0,0 +1,3 @@
{
"layout": "post"
}

View file

@ -0,0 +1,11 @@
+++
title = "Projects"
type = "projects"
layout = "single"
+++
# Projects
This is a curated list of projects I do outside of work for fun. Find the full list on my [Gitea profile][1].
[1]: https://git.mzhang.io/michael

26
eleventy.config.js Normal file
View file

@ -0,0 +1,26 @@
const util = require("util");
const inclusiveLangPlugin = require("@11ty/eleventy-plugin-inclusive-language");
const directoryOutputPlugin = require("@11ty/eleventy-plugin-directory-output");
const eleventySass = require("eleventy-sass");
module.exports = (config) => {
config.setQuietMode(true);
config.addPlugin(directoryOutputPlugin);
config.addPlugin(eleventySass);
config.addPlugin(inclusiveLangPlugin, {
words:
"simply,obviously,basically,of course,clearly,everyone knows,however",
});
config.addFilter("debug", (content) => `<pre>${util.inspect(content)}</pre>`);
return {
dir: {
input: "content",
includes: "../includes",
layouts: "../layouts",
output: "_site",
},
};
};

View file

@ -1,52 +1,15 @@
{
"nodes": {
"agda": {
"inputs": {
"flake-parts": "flake-parts",
"nixpkgs": "nixpkgs"
},
"locked": {
"lastModified": 1719426791,
"narHash": "sha256-jiPGzJM+XMwb1OwyxpbY6jV0Lzr5yLKnPSMZbueFNRM=",
"owner": "agda",
"repo": "agda",
"rev": "777b07f573c7526d677c18c25e44b24a849bc03b",
"type": "github"
},
"original": {
"owner": "agda",
"repo": "agda",
"type": "github"
}
},
"flake-parts": {
"inputs": {
"nixpkgs-lib": "nixpkgs-lib"
},
"locked": {
"lastModified": 1701473968,
"narHash": "sha256-YcVE5emp1qQ8ieHUnxt1wCZCC3ZfAS+SRRWZ2TMda7E=",
"owner": "hercules-ci",
"repo": "flake-parts",
"rev": "34fed993f1674c8d06d58b37ce1e0fe5eebcb9f5",
"type": "github"
},
"original": {
"owner": "hercules-ci",
"repo": "flake-parts",
"type": "github"
}
},
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1710146030,
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
"lastModified": 1681202837,
"narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
"rev": "cfacdce06f30d2b68473a46042957675eebb3401",
"type": "github"
},
"original": {
@ -56,45 +19,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1702346276,
"narHash": "sha256-eAQgwIWApFQ40ipeOjVSoK4TEHVd6nbSd9fApiHIw5A=",
"owner": "NixOS",
"lastModified": 1663551060,
"narHash": "sha256-e2SR4cVx9p7aW/XnVsGsWZBplApA9ZJUjc0fejJhnYo=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "cf28ee258fd5f9a52de6b9865cdb93a1f96d09b7",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-23.11",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs-lib": {
"locked": {
"dir": "lib",
"lastModified": 1701253981,
"narHash": "sha256-ztaDIyZ7HrTAfEEUt9AtTDNoCYxUdSd6NrRHaYOIxtk=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "e92039b55bcd58469325ded85d4f58dd5a4eaf58",
"type": "github"
},
"original": {
"dir": "lib",
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs_2": {
"locked": {
"lastModified": 1718089647,
"narHash": "sha256-COO4Xk2EzlZ3x9KCiJildlAA6cYDSPlnY8ms7pKl2Iw=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "f7207adcc68d9cafa29e3cd252a18743ae512c6a",
"rev": "8a5b9ee7b7a2b38267c9481f5c629c015108ab0d",
"type": "github"
},
"original": {
@ -104,9 +33,8 @@
},
"root": {
"inputs": {
"agda": "agda",
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs_2"
"nixpkgs": "nixpkgs"
}
},
"systems": {

View file

@ -1,34 +1,8 @@
{
inputs.agda.url = "github:agda/agda";
outputs = { self, nixpkgs, flake-utils, agda, }:
description = "A very basic flake";
outputs = { self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs { inherit system; };
agda-pkg = agda.packages.x86_64-linux.default;
flakePkgs = rec {
agda-bin = pkgs.callPackage ./nix/agda-bin.nix { inherit agda-pkg; };
docker-builder =
pkgs.callPackage ./nix/docker-builder.nix { inherit agda-bin; };
};
in {
packages = flake-utils.lib.flattenTree flakePkgs;
devShell = pkgs.mkShell {
ASTRO_TELEMETRY_DISABLED = 1;
packages = with pkgs;
with flakePkgs; [
bun
woodpecker-cli
nixfmt-rfc-style
dive
nix-tree
vips
shellcheck
agda-bin
nodejs_20
corepack
];
};
});
let pkgs = import nixpkgs { inherit system; };
in { devShell = pkgs.mkShell { packages = with pkgs; [ hugo ]; }; });
}

38
layouts/base.html Normal file
View file

@ -0,0 +1,38 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Michael Zhang</title>
<link rel="stylesheet" href="/main.css" />
</head>
<body>
<div class="flex-wrapper">
<div class="sep"></div>
<div class="container"></div>
</div>
<footer>
<p>
Blog code licensed under
<a href="https://www.gnu.org/licenses/gpl-3.0.txt" target="_blank"
>[GPL-3.0]</a
>. Post contents licensed under
<a href="https://creativecommons.org/licenses/by-sa/4.0/legalcode.txt"
>[CC BY-SA 4.0]</a
>.
<br />
Written by Michael Zhang.
<a
href="https://git.mzhang.io/michael/blog"
class="colorlink"
target="_blank"
>[Source]</a
>.
</p>
</footer>
</body>
</html>

5
layouts/post.html Normal file
View file

@ -0,0 +1,5 @@
---
layout: base
---
<h1>{{ title }}</h1>

View file

@ -1,23 +0,0 @@
{ agda-pkg, runCommand, writeShellScriptBin, writeTextFile, agdaPackages }:
let
libraryFile =
with agdaPackages;
writeTextFile {
name = "agda-libraries";
text = ''
${agdaPackages.cubical}/cubical.agda-lib
${agdaPackages.standard-library}/standard-library.agda-lib
'';
};
# Add an extra layer of indirection here to prevent all of GHC from being pulled in
wtf = runCommand "agda-bin" { } ''
cp ${agda-pkg}/bin/agda $out
'';
in
writeShellScriptBin "agda" ''
set -euo pipefail
exec ${wtf} --library-file=${libraryFile} $@
''

View file

@ -1,43 +0,0 @@
{ dockerTools
, agda-bin
, corepack
, rsync
, openssh
, bash
, coreutils
, nodejs_20
, gnused
, pkgsLinux
}:
dockerTools.buildLayeredImage {
name = "blog-docker-builder";
contents = with dockerTools; [
agda-bin
corepack
rsync
openssh
bash
coreutils
nodejs_20
gnused
usrBinEnv
caCertificates
fakeNss
];
# fakeRootCommands = ''
# #!${pkgsLinux.runtimeShell}
# ${pkgsLinux.dockerTools.shadowSetup}
# groupadd -r builder
# useradd -r -g builder builder
# '';
}
# copyToRoot = with dockerTools; buildEnv {
# name = "blog-docker-builder-image-root";
# paths = [
# ];
# };

2440
package-lock.json generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,55 +1,10 @@
{
"name": "blog",
"type": "module",
"version": "0.0.1",
"packageManager": "pnpm@9.4.0+sha256.b6fd0bfda555e7e584ad7e56b30c68b01d5a04f9ee93989f4b93ca8473c49c74",
"scripts": {
"dev": "astro dev",
"start": "astro dev",
"build": "astro build",
"preview": "astro preview",
"astro": "astro",
"format": "prettier -w ."
},
"dependencies": {
"@astrojs/markdoc": "^0.11.1",
"@astrojs/markdown-remark": "^5.1.1",
"@astrojs/mdx": "^1.1.5",
"@astrojs/rss": "^3.0.0",
"@astrojs/sitemap": "^3.1.6",
"@justfork/rehype-autolink-headings": "^5.1.1",
"astro": "^3.6.5",
"astro-imagetools": "^0.9.0",
"astro-remark-description": "^1.1.2",
"classnames": "^2.5.1",
"fork-awesome": "^1.2.0",
"katex": "^0.16.10",
"lodash-es": "^4.17.21",
"mdast-util-to-string": "^4.0.0",
"nanoid": "^4.0.2",
"reading-time": "^1.5.0",
"rehype-accessible-emojis": "^0.3.2",
"rehype-katex": "^6.0.3",
"remark-emoji": "^4.0.1",
"remark-github-beta-blockquote-admonitions": "^2.2.1",
"remark-math": "^5.1.1",
"remark-parse": "^10.0.2",
"sharp": "^0.33.4"
},
"devDependencies": {
"@biomejs/biome": "^1.8.2",
"@types/lodash-es": "^4.17.12",
"date-fns": "^2.30.0",
"hast-util-from-html": "^2.0.1",
"hast-util-to-html": "^9.0.1",
"mdast": "^3.0.0",
"mdast-util-from-markdown": "^2.0.1",
"prettier": "^3.3.2",
"prettier-plugin-astro": "^0.12.3",
"rehype-slug": "^6.0.0",
"sass": "^1.77.6",
"shiki": "^0.14.7",
"unified": "^11.0.5",
"unist-util-visit": "^5.0.0"
"@11ty/eleventy": "^2.0.1",
"@11ty/eleventy-plugin-directory-output": "^1.0.1",
"@11ty/eleventy-plugin-inclusive-language": "^1.0.3",
"eleventy-sass": "^2.2.1",
"sass": "^1.62.1",
"typescript": "^5.0.4"
}
}

View file

@ -1,157 +0,0 @@
// https://github.com/myl7/remark-github-beta-blockquote-admonitions
// License: Apache-2.0
import { visit } from "unist-util-visit";
import type { Data } from "unist";
import type { BuildVisitor } from "unist-util-visit";
import type { Blockquote, Paragraph, Text } from "mdast";
import type { RemarkPlugin } from "@astrojs/markdown-remark";
import classNames from "classnames";
const remarkAdmonitions: RemarkPlugin =
(providedConfig?: Partial<Config>) => (tree) => {
visit(tree, handleNode({ ...defaultConfig, ...providedConfig }));
};
export default remarkAdmonitions;
const handleNode =
(config: Config): BuildVisitor =>
(node) => {
// Filter required elems
if (node.type !== "blockquote") return;
const blockquote = node as Blockquote;
if (blockquote.children[0]?.type !== "paragraph") return;
const paragraph = blockquote.children[0];
if (paragraph.children[0]?.type !== "text") return;
const text = paragraph.children[0];
// A link break after the title is explicitly required by GitHub
const titleEnd = text.value.indexOf("\n");
if (titleEnd < 0) return;
const textBody = text.value.substring(titleEnd + 1);
let title = text.value.substring(0, titleEnd);
// Handle whitespaces after the title.
// Whitespace characters are defined by GFM
const m = /[ \t\v\f\r]+$/.exec(title);
if (m && !config.titleKeepTrailingWhitespaces) {
title = title.substring(0, title.length - m[0].length);
}
if (!nameFilter(config.titleFilter)(title)) return;
const { displayTitle, checkedTitle } = config.titleTextMap(title);
// Update the text body
text.value = textBody;
// Insert the title element and add classes for the title
const paragraphTitleText: Text = { type: "text", value: displayTitle };
const paragraphTitle: Paragraph = {
type: "paragraph",
children: [paragraphTitleText],
data: config.dataMaps.title({
hProperties: {
className: classNameMap(config.classNameMaps.title)(checkedTitle),
},
}),
};
blockquote.children.unshift(paragraphTitle);
// Add classes for the block
blockquote.data = config.dataMaps.block({
...blockquote.data,
hProperties: {
className: classNameMap(config.classNameMaps.block)(checkedTitle),
},
// The blockquote should be rendered as a div, which is explicitly required by GitHub
hName: "div",
});
};
const TITLE_PATTERN =
/\[\!admonition: (attention|caution|danger|error|hint|important|note|tip|warning)\]/i;
export const mkdocsConfig: Partial<Config> = {
classNameMaps: {
block: (title) => [
"admonition",
...(title.startsWith("admonition: ")
? title.substring("admonition: ".length)
: title
).split(" "),
],
title: classNames("admonition-title"),
},
titleFilter: (title) => Boolean(title.match(TITLE_PATTERN)),
titleTextMap: (title: string) => {
console.log("title", title);
const match = title.match(TITLE_PATTERN);
console.log("matches", match);
const displayTitle = match?.[1] ?? "";
const checkedTitle = displayTitle;
return { displayTitle, checkedTitle };
},
};
export interface Config {
classNameMaps: {
block: ClassNameMap;
title: ClassNameMap;
};
titleFilter: NameFilter;
titleTextMap: (title: string) => {
displayTitle: string;
checkedTitle: string;
};
dataMaps: {
block: (data: Data) => Data;
title: (data: Data) => Data;
};
titleKeepTrailingWhitespaces: boolean;
legacyTitle: boolean;
}
export const defaultConfig: Config = {
classNameMaps: {
block: "admonition",
title: "admonition-title",
},
titleFilter: ["[!NOTE]", "[!IMPORTANT]", "[!WARNING]"],
titleTextMap: (title) => ({
displayTitle: title.substring(2, title.length - 1),
checkedTitle: title.substring(2, title.length - 1),
}),
dataMaps: {
block: (data) => data,
title: (data) => data,
},
titleKeepTrailingWhitespaces: false,
legacyTitle: false,
};
type ClassNames = string | string[];
type ClassNameMap = ClassNames | ((title: string) => ClassNames);
export function classNameMap(gen: ClassNameMap) {
return (title: string) => {
const classNames = typeof gen === "function" ? gen(title) : gen;
return typeof classNames === "object" ? classNames.join(" ") : classNames;
};
}
type NameFilter = ((title: string) => boolean) | string[];
export function nameFilter(filter: NameFilter) {
return (title: string) => {
return typeof filter === "function"
? filter(title)
: filter.includes(title);
};
}

View file

@ -1,156 +0,0 @@
import type { RootContent } from "hast";
import { fromMarkdown } from "mdast-util-from-markdown";
import { fromHtml } from "hast-util-from-html";
import { toHtml } from "hast-util-to-html";
import { spawnSync } from "node:child_process";
import {
mkdirSync,
mkdtempSync,
readFileSync,
existsSync,
readdirSync,
copyFileSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join, parse } from "node:path";
import { visit } from "unist-util-visit";
import type { RemarkPlugin } from "@astrojs/markdown-remark";
interface Options {
base: string;
outDir: string;
publicDir: string;
}
const remarkAgda: RemarkPlugin = ({ base, publicDir }: Options) => {
const destDir = join(publicDir, "generated", "agda");
mkdirSync(destDir, { recursive: true });
return (tree, { history }) => {
const path: string = history[history.length - 1]!;
if (!(path.endsWith(".lagda.md") || path.endsWith(".agda"))) return;
console.log("AGDA:processing path", path);
const tempDir = mkdtempSync(join(tmpdir(), "agdaRender."));
const agdaOutDir = join(tempDir, "output");
const agdaOutFilename = parse(path).base.replace(/\.lagda.md/, ".md");
const agdaOutFile = join(agdaOutDir, agdaOutFilename);
mkdirSync(agdaOutDir, { recursive: true });
const childOutput = spawnSync(
"agda",
[
"--html",
`--html-dir=${agdaOutDir}`,
"--highlight-occurrences",
"--html-highlight=code",
path,
],
{},
);
if (childOutput.error || !existsSync(agdaOutFile)) {
throw new Error(
`Agda error:
Stdout:
${childOutput.stdout}
Stderr:
${childOutput.stderr}`,
{
cause: childOutput.error,
},
);
}
// // TODO: Handle child output
// console.error("--AGDA OUTPUT--");
// console.error(childOutput);
// console.error(childOutput.stdout?.toString());
// console.error(childOutput.stderr?.toString());
// console.error("--AGDA OUTPUT--");
const referencedFiles = new Set();
for (const file of readdirSync(agdaOutDir)) {
referencedFiles.add(file);
const fullPath = join(agdaOutDir, file);
const fullDestPath = join(destDir, file);
if (file.endsWith(".html")) {
const src = readFileSync(fullPath);
writeFileSync(
fullDestPath,
`
<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet" href="${base}generated/agda/Agda.css" />
</head>
<body>
<pre class="Agda">
${src}
</pre>
</body>
</html>
`,
);
} else {
copyFileSync(fullPath, fullDestPath);
}
}
const htmlname = parse(path).base.replace(/\.lagda.md/, ".html");
const doc = readFileSync(agdaOutFile);
// This is the post-processed markdown with HTML code blocks replacing the Agda code blocks
const tree2 = fromMarkdown(doc);
const collectedCodeBlocks: RootContent[] = [];
visit(tree2, "html", (node) => {
const html = fromHtml(node.value, { fragment: true });
const firstChild: RootContent = html.children[0]!;
visit(html, "element", (node) => {
if (node.tagName !== "a") return;
if (node.properties.href) {
// Trim off end
const [href, hash, ...rest] = node.properties.href.split("#");
if (rest.length > 0) throw new Error("come look at this");
if (href === htmlname) node.properties.href = `#${hash}`;
if (referencedFiles.has(href)) {
node.properties.href = `${base}generated/agda/${href}${hash ? `#${hash}` : ""}`;
node.properties.target = "_blank";
}
}
});
if (!firstChild?.properties?.className?.includes("Agda")) return;
const stringContents = toHtml(firstChild);
collectedCodeBlocks.push({
contents: stringContents,
});
});
let idx = 0;
visit(tree, "code", (node) => {
if (!(node.lang === null || node.lang === "agda")) return;
node.type = "html";
node.value = collectedCodeBlocks[idx].contents;
idx += 1;
});
};
};
export default remarkAgda;

View file

@ -1,16 +0,0 @@
import getReadingTime from "reading-time";
import { toString } from "mdast-util-to-string";
import type { RemarkPlugin } from "@astrojs/markdown-remark";
const remarkReadingTime: RemarkPlugin = () => {
return (tree, { data }) => {
const textOnPage = toString(tree);
const readingTime = getReadingTime(textOnPage);
// readingTime.text will give us minutes read as a friendly string,
// i.e. "3 min read"
data.astro.frontmatter.minutesRead = readingTime.text;
};
};
export default remarkReadingTime;

View file

@ -1,42 +0,0 @@
import type { RemarkPlugin } from "@astrojs/markdown-remark";
import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
import { visit } from "unist-util-visit";
import { join } from "node:path";
import { spawnSync } from "node:child_process";
import { tmpdir } from "node:os";
const remarkTypst: RemarkPlugin = () => {
const tmp = mkdtempSync(join(tmpdir(), "typst"));
let ctr = 0;
return (tree) => {
visit(
tree,
(node) => node.type === "code" && node.lang === "typst",
(node, index, parent) => {
const doc = join(tmp, `${ctr}.typ`);
const docOut = join(tmp, `${ctr}.svg`);
ctr += 1;
writeFileSync(doc, node.value);
const result = spawnSync(
"typst",
[
"compile",
"--format",
"svg",
doc,
],
{},
);
console.log("OUTPUT", result.stderr.toString());
const svgOut = readFileSync(docOut);
node.type = "html";
node.value = svgOut.toString();
},
);
};
};
export default remarkTypst;

File diff suppressed because it is too large Load diff

View file

@ -1 +0,0 @@
did:plc:zbg2asfcpyughqspwjjgyc2d

View file

@ -1,2 +0,0 @@
User-agent: *
Allow: /

View file

@ -1,13 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
nix build .#docker-builder
IMAGE_NAME=$(docker load -q -i ./result | cut -d':' -f2,3 | xargs)
REMOTE_IMAGE_NAME="git.mzhang.io/michael/$IMAGE_NAME"
docker image tag "$IMAGE_NAME" "$REMOTE_IMAGE_NAME"
sed -i -E "s~(.*image: ).*blog-docker-builder:?.*~\1$REMOTE_IMAGE_NAME~" .woodpecker.yml
echo "Created $REMOTE_IMAGE_NAME"
docker push -q "$REMOTE_IMAGE_NAME"

View file

@ -1,95 +0,0 @@
{-# OPTIONS --cubical-compatible #-}
module Prelude where
open import Agda.Primitive
private
variable
l : Level
data : Set where
rec-⊥ : {A : Set} A
rec-⊥ ()
¬_ : Set Set
¬ A = A
data : Set where
tt :
data Bool : Set where
true : Bool
false : Bool
id : {l : Level} {A : Set l} A A
id x = x
module Nat where
data : Set where
zero :
suc :
{-# BUILTIN NATURAL #-}
infixl 6 _+_
_+_ :
zero + n = n
suc m + n = suc (m + n)
open Nat public
infix 4 _≡_
data _≡_ {l} {A : Set l} : (a b : A) Set l where
instance refl : {x : A} x x
{-# BUILTIN EQUALITY _≡_ #-}
ap : {A B : Set l} (f : A B) {x y : A} x y f x f y
ap f refl = refl
sym : {A : Set l} {x y : A} x y y x
sym refl = refl
trans : {A : Set l} {x y z : A} x y y z x z
trans refl refl = refl
infixl 10 _∙_
_∙_ = trans
transport : {l₁ l₂ : Level} {A : Set l₁} {x y : A}
(P : A Set l₂)
(p : x y)
P x P y
transport {l₁} {l₂} {A} {x} {y} P refl = id
infix 4 _≢_
_≢_ : {A : Set} A A Set
x y = ¬ (x y)
module dependent-product where
infixr 4 _,_
infixr 2 _×_
record Σ {l₁ l₂} (A : Set l₁) (B : A Set l₂) : Set (l₁ l₂) where
constructor _,_
field
fst : A
snd : B fst
open Σ
{-# BUILTIN SIGMA Σ #-}
syntax Σ A (λ x B) = Σ[ x A ] B
_×_ : {l : Level} (A B : Set l) Set l
_×_ A B = Σ A (λ _ B)
open dependent-product public
_∘_ : {A B C : Set} (g : B C) (f : A B) A C
(g f) a = g (f a)
__ : {A B : Set} (f g : A B) Set
__ {A} f g = (x : A) f x g x
postulate
funExt : {l : Level} {A B : Set l}
{f g : A B}
((x : A) f x g x)
f g

View file

@ -1,16 +0,0 @@
---
import "../styles/footer.scss";
---
<footer>
<p>
Blog code licensed under <a href="https://www.gnu.org/licenses/gpl-3.0.txt" target="_blank"
>GPL-3.0</a
>. Post contents licensed under <a
href="https://creativecommons.org/licenses/by-sa/4.0/legalcode.txt">CC BY-SA 4.0</a
>.
<br />
Written by Michael Zhang.
<a href="https://git.mzhang.io/michael/blog" class="colorlink" target="_blank">Source</a>.
</p>
</footer>

View file

@ -1,7 +0,0 @@
---
interface Props {}
const {} = Astro.props;
console.log("SHIET", Astro.props);
console.log("SHIET", await Astro.slots.render("default"));
---

View file

@ -1,34 +0,0 @@
---
import "../styles/leftNav.scss";
import { Content as ShortBio } from "../content/partials/shortBio.md";
import links from "../data/links";
import { Image } from "astro:assets";
import portrait from "../assets/self.png";
---
<nav class="side-nav">
<div class="side-nav-content">
<a href="/" class="portrait">
<Image src={portrait} alt="portrait" class="portrait" />
</a>
<div class="me">
<h1 class="title">Michael Zhang</h1>
<div class="links">
{
links.map((link) => {
return (
<a href={link.url} title={link.description}>
{link.name}
</a>
);
})
}
</div>
</div>
<div class="bio">
<ShortBio />
<a href="/about">More &raquo;</a>
</div>
</div>
</nav>

View file

@ -1,130 +0,0 @@
---
import { getCollection, type CollectionEntry } from "astro:content";
import Timestamp from "./Timestamp.astro";
import { sortBy } from "lodash-es";
import TagList from "./TagList.astro";
interface Props {
basePath: string;
/** Whether or not to include drafts in this list */
drafts?: "exclude" | "include" | "only";
filteredPosts?: Post[];
class?: string | undefined;
timeFormat?: string | undefined;
collection?: string;
}
type Post = CollectionEntry<"posts">;
const {
class: className,
basePath,
drafts: includeDrafts,
filteredPosts,
timeFormat,
collection,
} = Astro.props;
type FilterFn = (_: Post) => boolean;
function unreachable(): never {
throw new Error("unreachable");
}
function getFilter(): FilterFn {
switch (includeDrafts) {
case "exclude":
case undefined:
return (post: Post) => !post.data.draft;
case "include":
return (_: Post) => true;
case "only":
return (post: Post) => post.data.draft === true;
}
return unreachable();
}
const filter = getFilter();
let allPosts;
if (filteredPosts) allPosts = filteredPosts.filter(filter);
else allPosts = await getCollection(collection ?? "posts", filter);
const sortedPosts = sortBy(allPosts, (post) => -post.data.date);
---
<table class={`postListing ${className}`}>
{
sortedPosts.map((post) => {
return (
<>
<tr class="row">
<td>
<div class="title">
<a href={`${basePath}/${post.slug}/`} class="brand-colorlink">
{post.data.title}
</a>
</div>
<div>
<Timestamp timestamp={post.data.date} format={timeFormat} />
&middot;
<TagList tags={post.data.tags} />
</div>
</td>
</tr>
</>
);
})
}
</table>
<style lang="scss">
.postListing {
width: 100%;
border-spacing: 6px 16px;
:global(.timestamp) {
font-family: var(--monofont);
color: var(--smaller-text-color);
font-size: 0.75em;
}
:global(.tags) {
gap: 4px;
display: inline-flex;
:global(.tag) {
background-color: inherit;
padding: 0;
}
}
td {
line-height: 1;
.title {
font-size: 12pt;
}
.summary {
padding-top: 4px;
font-size: 0.64em;
color: var(--smaller-text-color);
p {
display: inline;
}
}
}
td.info {
color: var(--smaller-text-color);
font-size: 0.75em;
white-space: nowrap;
text-align: right;
vertical-align: baseline;
.spacer {
font-size: 12pt;
}
}
}
</style>

View file

@ -1,18 +0,0 @@
---
import "../styles/tagList.scss";
const { extraFront, tags, extraBack } = Astro.props;
---
<div class="tags">
{extraFront}
{
tags.toSorted().map((tag: string) => (
<a href={`/tags/${tag}`} class="tag">
<i class="fa fa-tag" aria-hidden="true" />
<span class="text">#{tag}</span>
</a>
))
}
{extraBack}
</div>

View file

@ -1,13 +0,0 @@
---
interface Props {
timestamp: Date;
format?: string | undefined;
}
const { timestamp, format: customFormat } = Astro.props;
import { format } from "date-fns";
---
<span class="timestamp" title={timestamp.toISOString()}>
{format(timestamp, customFormat ?? "yyyy MMM dd")}
</span>

View file

@ -1,128 +0,0 @@
---
import type { MarkdownHeading } from "astro";
import "../styles/toc.scss";
interface Props {
toc: boolean;
headings: MarkdownHeading[];
}
const { toc, headings } = Astro.props;
const minDepth = Math.min(...headings.map((heading) => heading.depth));
---
{
toc ? (
<>
<div class="toc-wrapper">
<slot />
<div class="toc">
<h3 class="title">Table of contents</h3>
<ul>
{headings.map((heading) => {
const depth = heading.depth - minDepth;
const padding = 10 * Math.pow(0.85, depth);
const fontSize = 14 * Math.pow(0.9, depth);
return (
<li>
<a
href={`#${heading.slug}`}
id={`${heading.slug}-link`}
style={`padding: ${padding}px;`}
>
<span style={`padding-left: ${depth * 10}px; font-size: ${fontSize}px;`}>
{heading.text}
</span>
</a>
</li>
);
})}
</ul>
</div>
</div>
</>
) : (
<slot />
)
}
<script define:vars={{ toc, headings }}>
if (toc) {
const headingTags = new Set(["h1", "h2", "h3", "h4", "h5", "h6"]);
const headingsMap = new Map([...headings.map((heading) => [heading.slug, new Set()])]);
const reverseHeadingMap = new Map();
const linkMap = new Map();
document.addEventListener("DOMContentLoaded", function () {
const visibleElements = new Map();
// Register links
for (const heading of headings) {
const link = document.getElementById(`${heading.slug}-link`);
const el = document.getElementById(heading.slug);
if (link && el) {
linkMap.set(heading.slug, link);
link.addEventListener("click", (e) => {
el.scrollIntoView({ behavior: "smooth" });
e.preventDefault();
});
}
if (!visibleElements.has(heading.slug)) {
visibleElements.set(heading.slug, new Set());
}
}
const observer = new IntersectionObserver((entries) => {
for (const entry of entries) {
const target = entry.target;
const slug = reverseHeadingMap.get(target);
const link = linkMap.get(slug);
const associatedEls = visibleElements.get(slug);
if (entry.isIntersecting) {
// if it wasn't previously visible
// let's make the link active
if (associatedEls.size === 0) {
link.parentNode.classList.add("active");
}
associatedEls.add(target);
} else {
// if it was previously visible
// check if it's the last element
if (associatedEls.size > 0) {
if (associatedEls.size === 1) link.parentNode.classList.remove("active");
}
if (associatedEls.size > 0) {
associatedEls.delete(target);
}
ratioMap.delete(target);
}
}
});
const postContentEl = document.getElementById("post-content");
let belongsTo;
for (const child of postContentEl.children) {
if (headingTags.has(child.tagName.toLowerCase())) {
belongsTo = child.id;
}
if (belongsTo) {
const headingSet = headingsMap.get(belongsTo);
headingSet.add(child);
reverseHeadingMap.set(child, belongsTo);
}
}
[...headingsMap.values()]
.flatMap((x) => [...x])
.forEach((x) => {
observer.observe(x);
});
});
}
</script>

View file

@ -1,5 +0,0 @@
// Place any global data in this file.
// You can import this data from anywhere in your site by using the `import` keyword.
export const SITE_TITLE = "Astro Blog";
export const SITE_DESCRIPTION = "Welcome to my website!";

View file

@ -1,34 +0,0 @@
import { defineCollection, z } from "astro:content";
const posts = defineCollection({
type: "content",
// Type-check frontmatter using a schema
schema: ({ image }) =>
z.object({
title: z.string(),
date: z.date(),
// Transform string to Date object
pubDate: z
.string()
.or(z.date())
.transform((val) => new Date(val))
.optional(),
updatedDate: z
.string()
.optional()
.transform((str) => (str ? new Date(str) : undefined))
.optional(),
heroImage: image().optional(),
heroAlt: z.string().optional(),
tags: z.array(z.string()).default([]),
draft: z.boolean().default(false),
math: z.boolean().default(false),
toc: z.boolean().default(false),
}),
});
export const collections = { posts };

View file

@ -1,11 +0,0 @@
I'm a computer science master's student at the [University of Minnesota][1], advised by [Favonia].
My current research focus for my thesis is cubical type theory and its applications to formalizing mathematical proofs.
I also work as a researcher for [SIFT]. Previously I worked as a software engineer at [Swoop Search] and [AWS].
My computing-related interests lie in programming language design, type theory, formal verification, systems security, cryptography, and distributed systems.
[1]: https://twin-cities.umn.edu/
[Swoop Search]: https://swoopsrch.com/
[aws]: https://aws.amazon.com/
[sift]: https://www.sift.net/
[favonia]: https://favonia.org/

View file

@ -1,65 +0,0 @@
---
title: "Decentralized Identity, a Middle Ground"
date: 2022-10-30T03:04:51-05:00
logseq: true
tags:
- web
---
- With Twitter management changing, I'm starting to think about the small web again. The small web is a concept where you use the Internet to interact with people directly, rather than operating through the medium of megacorps. When we think about big monoliths like Twitter and Facebook, at the very heart of it is some kind of algorithm picking what it is we see. And with the power to control that gives us advertising and mass manipulation.
- For the rest of this blog post, I'll be talking about _microblogging_ specifically: short posts of several sentences, along with a few media attachments, of which Twitter is one of the largest monolithic implementations. But the things I talk about are also applicable to other applications, including long-form blogging (Medium), image sharing (Instagram, Snapchat), video sharing (YouTube), chat (Messenger)
- There's several projects out there that are already trying out other extremes. I'll cover two big types here:
- **Federation.** This style of application development is inspired by email. The idea is that you develop a common _protocol_ for apps living on different servers to talk to each other, so you can have people choose their own servers rather than trusting everything to megacorps like Google or Facebook. One notable example of this in the microblogging realm is Mastodon / ActivityPub.
- **Peer-to-peer.** This style of application development is rarer, but can be seen commonly in things like torrents. The idea is that _each_ client holds all application info, and interacts with everything else as if they were all clients. One notable example of this in the microblogging realm is Scuttlebutt.
- I'll go into more detail about these two styles later, but they both come with their advantages and drawbacks. However, I don't think any of the solutions that exist are sustainable for longer periods of time.
- ## Feature Evaluation
- One thing I've been doing recently when I think about software applications and system design is break things down into their **features**. This makes it a lot easier to compare applications or services to each other than a vague statement like "I've had a good/bad experience with this". So let's come up with some features of a good microblogging platform:
- Application design
- **Verifiability:** It should be easy to verify if I did indeed ever say something.
- **Freedom:** Within reason, I should not be limited in what I say or do on the platform.
- **Universality:** I should be able to communicate to anyone from anywhere (any device, any location).
- System design
- **Availability:** I want to be confident that the system will be around for many years.
- **Connectability:** I should be able to connect with people I don't already know easily.
- **Moderation:** I should be able to choose to not interact with a certain subset of users, and this process should not be painful.
- Scaling
- **User scaling:** Supporting more users should incur a relatively linear or sublinear cost, and should certainly not lead to resource consumption blowup.
- **Temporal scaling:** The system should support being run for an arbitrarily long amount of time.
- Development
- **Malleability:** How easy is it to introduce / roll out new versions and features? This may be important for updating cryptographic algorithms.
- **Version drift:** Is it possible to support users who are on different version of the service?
- Some features are certainly more important than others, and may have different importance to different people. But it gives me a base line to evaluate services, and lets me decide which features I consider to be critical.
- These are obviously not the only features, but I like to list them in this form because while some
alternative apps may tout certain features, it's useless if it falls short on something more important.
- ### Evaluation
- Now that we know what we're looking for, let's evaluate some existing services.
- **Mastodon**
- Satisfies:
- **Connectability:** It's easy to find other users based on their user ID, which looks like an email so it's easy to share.
- Doesn't satisfy:
- **Freedom:** While certain instances may be specialized (i.e hobby-focused), it's certainly not great when you are locked out of communicating with someone on another instance because the admins of your respective instances have beef with each other.
- **Availability:** For some reason or another, I've seen Mastodon admins give up on their instances. Running an instance is very costly in terms of money and time, so it's not surprising that if an instance owner decides it's simply no longer worth it, the users are simply hosed. While there are some options for migration like redirection, it's useless if the original server doesn't stay online.
- **Scuttlebutt**
- Satisfies:
- **Freedom:** This is the maximally free solution: your client is solely responsible for receiving and filtering all messages.
- Doesn't satisfy:
- **Connectability:** Connecting with someone must take place over an existing channel (i.e either meeting in meatspace or sharing a pub). If I don't have my Scuttlebutt client with me, then it's not possible to get updates.
- **Universality:** Since user identity is heavily tied to a single client, having multiple clients for a single user is not a well-supported workflow.
- The Matrix people have also developed [Cerulean], a playground for testing the idea of _relative reputation_, where instead of doing moderation of all users you can assign a web-of-trust-style reputation to people you follow. However, this may lead to further filter-bubbling, the implications of which I'm honestly not sure how to tackle.
[Cerulean]: https://matrix.org/blog/2020/12/18/introducing-cerulean#whats-with-the-decentralised-reputation-button
- ## Decentralized Identity
- My view for an ideal microblogging platform would be a federated server-client architecture, but where a user identity isn't tied to the server the same way a user is tied to their Mastodon instance. Essentially, you want your account to be promiscuous between several servers.
- This way, you gain all of the benefits of having a server-client architecture (easy to do multiple clients), while not relying on the server itself or its admins to stay around for a long time.
- The problem is, designing such a system securely is difficult. Ultimately, what I'm proposing is:
- A user identity model based on an asymmetric-key model. Users generate their own key locally and provide it to instances.
- Users should then have _client_ keys, which are independent of the user keys. These are signed by the user key to indicate that they belong to the same user.
- When a user wants to switch servers, they can begin requesting a checkout of their data from the server. The data can be signed by the server as well to provide an indicator that the client isn't trying to forge old history.
- One problem is if a server wants to deny a user from switching servers; they may try to keep the data hostage. One solution is just to scrape all publicly obtainable data, the other is to hope that this is socially discouraged by just having users avoid servers that engage in user-hostile actions.
- Users may also opt to replicate their identity entirely between two servers. This just means that this synchronization process is just happening incrementally or periodically rather than just at switch-time.
- This still has a number of open problems:
- **Key revocation.** Obviously, anything involving cryptographic keys requires a revocation protocol in the event that the keys are compromised. I'm not too sure of a good way of doing this that doesn't involve an infinitely growing append-only list of revocations on all of the people who have
- When I have some free time, I may take a crack at a proof-of-concept implementation of a system like this.
- ### Further applications
- Since this scheme only really targets the identity layer with (theoretically) asymptotically constant change required for other parts of the software, this should be able to be extended to things other than just microblogging. I'm not sure about the level of success this may have for things like sharing large data files such as video (maybe mix in something like IPFS?), but there is certainly room for exploration.

View file

@ -1,181 +0,0 @@
---
title: "Equivalences"
slug: "equivalences"
date: 2023-05-06
tags:
- type-theory
- agda
- hott
math: true
draft: true
---
<details>
<summary>Imports</summary>
```
{-# OPTIONS --cubical #-}
open import Agda.Primitive.Cubical
open import Cubical.Foundations.Equiv
open import Cubical.Foundations.Prelude
open import Data.Bool
```
</details>
```
Bool-id : Bool → Bool
Bool-id true = true
Bool-id false = false
unap : {A B : Type} {x y : A} (f : A → B) → f x ≡ f y → x ≡ y
unap p i = ?
-- Need to convert point-wise equality into universally quantified equality?
Bool-id-refl : (x : Bool) → (Bool-id x ≡ x)
Bool-id-refl true = refl
Bool-id-refl false = refl
```
The equivalence proof below involves the contractibility-of-fibers definition of
an equivalence. There are others, but the "default" one used by the Cubical
standard library uses this.
```
Bool-id-is-equiv : isEquiv Bool-id
```
In the contractibility-of-fibers proof, we must first establish our fibers. If
we had $(f : A \rightarrow B)$, then this is saying given any $(y : B)$, we must
provide:
- an $(x : A)$ that would have gotten mapped to $y$ (preimage), and
- a proof that $f\ x \equiv y$
These are the two elements of the pair given below. Since our function is `id`,
we can just give $y$ again, and use the `refl` function above for the equality
proof
```
```
The next step is to prove that it's contractible. Using the same derivation for
$y$ as above, this involves taking in another fiber $y_1$, and proving that it's
equivalent the fiber we've just defined above.
To prove fiber equality, we can just do point-wise equality over both the
preimage of $y$, and then the second-level equality of the proof of $f\ x \equiv
y$.
In the first case here, we need to provide something that equals our $x$ above
when $i = i0$, and something that equals the fiber $y_1$'s preimage $x_1$ when
$i = i1$, aka $y \equiv proj_1\ y_1$.
```
-- 2023-05-13: Favonia's hint is to compute "ap g p", and then concatenate
-- it with a proof that g is the left-inverse of f
-- ok i'm pretty sure this should be the g = f^-1
Bool-id-inv : Bool → Bool
Bool-id-inv b = (((Bool-id-is-equiv .equiv-proof) b) .fst) .fst
Bool-id-inv-is-inv : (b : Bool) → Bool-id-inv (Bool-id b) ≡ b
Bool-id-inv-is-inv true =
Bool-id-inv (Bool-id true)
≡⟨ refl ⟩
Bool-id-inv true
≡⟨ refl ⟩
-- This isn't trivially true?
(Bool-id-is-equiv .equiv-proof true .fst) .fst
≡⟨ ? ⟩
true
Bool-id-inv-is-inv false = ?
Bool-id-is-equiv .equiv-proof y .fst = y , Bool-id-refl y
Bool-id-is-equiv .equiv-proof y .snd y₁ i .fst =
let
eqv = snd y₁
-- eqv : Bool-id (fst y₁) ≡ y
-- this is the second pieece of the other fiber passed in
eqv2 = eqv ∙ sym (Bool-id-refl y)
-- eqv2 : Bool-id (fst y₁) ≡ Bool-id y
-- concat the fiber (Bool-id (fst y₁) ≡ y) with (y ≡ Bool-id y) to get the
-- path from (Bool-id (fst y₁) ≡ Bool-id y)
-- Ok, unap doesn't actually exist unless f is known to have an inverse.
-- Fortunately, because we're proving an equivalence, we know that f has an
-- inverse, in particular going from y to x, which in this case is also y.
eqv3 = unap Bool-id eqv2
-- Then, ap g p should be like:
ap-g-p : Bool-id-inv (Bool-id (fst y₁)) ≡ Bool-id-inv (Bool-id y)
ap-g-p = cong Bool-id-inv eqv2
-- OHHHHH now we just need to find that Bool-id-inv (Bool-id y) ≡ y, and
-- then we can apply it to both sides to simplify
-- So something like this:
-- left-id : Bool-id-inv ∙ Bool-id ≡ ?
-- left-id = ?
eqv3 = cong Bool-id-inv eqv2
give-me-info = ?
-- eqv3 : fst y₁ ≡ y
-- Use the equational reasoning shitter
final : y ≡ fst y₁
final =
y
≡⟨ ? ⟩
fst y₁
```
Blocked on this issue: https://git.mzhang.io/school/cubical/issues/1
```
eqv4 = ?
eqv4 = sym eqv3
-- eqv4 : y ≡ fst y₁
in
eqv4 i
```
Now we can prove that the path is the same
\begin{CD}
A @> > > B \\\
@VVV @VVV \\\
C @> > > D
\end{CD}
- $A \rightarrow B$ is the path of the original fiber that we've specified, which is $f\ x \equiv y$
- $C \rightarrow D$ is the path of the other fiber that we're proving, which is $proj_2\ y_1$
So what we want now is `a-b ≡ c-d`
```
Bool-id-is-equiv .equiv-proof y .snd y₁ i .snd j =
let
a-b = Bool-id-refl y
c-d = y₁ .snd
in
?
```
Blocked on this issue: https://git.mzhang.io/school/cubical/issues/2
```
```
## Other Equivalences
There are 2 other ways we can define equivalences:
TODO: Talk about them being equivalent to each other

View file

@ -1,227 +0,0 @@
---
title: "Learn by implementing Nginx's reverse proxy"
date: 2023-07-04
tags: ["web", "learn-by-implementing"]
draft: true
---
[Nginx] is a powerful tool but also comes with many knobs, which may make it
intimidating for lots of newcomers. In this post, let's rewrite its core
functionality using a few lines of code to understand what it's doing.
[nginx]: https://nginx.org/
<!--more-->
To begin, what's a [reverse-proxy]?
[reverse-proxy]: https://en.wikipedia.org/wiki/Reverse_proxy
- A proxy usually lets you access a site through some gateway when reaching that
site when your client is sitting behind some intercepting firewall
- A _reverse_ proxy lets others access a site through some gateway when reaching
a server that's serving a site from behind a firewall
As a middleman, it gets all requests and can introspect on the header and body
details. Which means it can:
- Serve multiple domains on the same server / port
- Wrap unencrypted services using HTTPS
- Perform load balancing
- Perform some basic routing
- Apply authentication
- Serve raw files without a server program
I'm going to implement this using [Deno].
[deno]: https://deno.land/
> **:bulb: This is a literate document.** I wrote a [small utility][3] to
> extract the code blocks out of markdown files, and it should produce working
> example for this file. If you have the utility, then running the following
> should get you a copy of all the code extracted from this blog post:
>
> [3]: https://git.mzhang.io/michael/markout
>
> ```
> markout --lang ts path/to/posts/2023-07-04-learn-by-implementing-nginx.md > program.ts
> ```
>
> It can then be executed with Deno:
>
> ```
> deno run --allow-net program.ts
> ```
>
> <details>
> <summary>Imports</summary>
>
> ```ts
> import { serve } from "https://deno.land/std@0.192.0/http/mod.ts";
> const PORT = 8314;
> ```
>
> </details>
Deno implements an HTTP server for us. On a really high level, what this means
is it starts listening for TCP connections, and once it receives one, listens
for request headers and parses it. It then exposes methods for us to read the
headers and decide how to further receive the body. All we need to do is provide
it an async function to handle the request, and return a response. Something
like this:
```ts
// @ts-ignore
async function handlerImpl1(request: Request): Promise<Response> {
// code goes here...
}
// Later,
// serve(handlerImpl1, { port: PORT });
```
Now one of the primarily utilties of something like Nginx is something called
**virtual hosts**. In networking, you would call a host the machine that runs
the server program. However, virtual hosts means the same machine can run
multiple server programs. This is possible because in the HTTP header
information, the client sends the domain it's trying to access.
```
Host: example.com
```
Using this info, the server can decide to route the request differently on a
per-domain basis. Something like SSH is not able to do this because nowhere
during the handshake process does the client ever request a particular domain.
You would have to wrap SSH with something else that's knowledgeable about that.
In our reverse-proxy example, we would want to redirect the request internally
to some different server, and then serve the response back to the client
transparently so it never realizes it went through a middleman!
So let's say we get some kind of config from the server admin, saying where to
send each request. It looks like this:
```ts
interface Config1 {
/** An object mapping a particular domain to a destination URL */
[domain: string]: string;
}
```
Let's wrap our function with another function, where we can take in the config
and make it accessible to the handler.
<details>
<summary>Why?</summary>
The `serve` here is what's called a **higher-order function**. This means that
rather than passing just data to it, we're passing it a function as a
_variable_ to store and call of its own volition. A common example of
a higher-order function is `Array.map`, where you take a function and apply it
to all elements within the array.
So since `serve` is calling our handler, we cannot change its signature.
That's because in order to change its signature, we have to change where it's
called, which is inside the Deno standard library.
Fortunately, functions capture variables (like `config`) from outside of their
scope, and when we pass it to `serve`, it retains those captured variables.
For an implementation like this, you don't actually need to wrap it in another
function like `mkHandler2`, but I'm doing it here to make it easier to
separate out the code into pieces that fit the prose of the blog post. You
could just as well just define it like this:
```
const config = { ... };
const handler = async function(request: Request): Promise<Response> {
// code goes here...
};
serve(handler, { port: PORT });
```
</details>
```ts
function mkHandler2(config: Config1) {
// @ts-ignore
return async function (request: Request): Promise<Response> {
// code goes here...
};
}
```
I'm going to write this in the most straightforward way possible, ignoring
`null` cases. Obviously in a real implementation, you would want to do error
checking and recovery (since the reverse proxy server must never crash, right?)
```ts
function mkHandler3(config: Config1) {
return async function (request: Request): Promise<Response> {
// Look for the host header
const hostHeader = request.headers.get("Host") as string;
// Look it up in our config
const proxyDestinationPrefix = config[hostHeader] as string;
// Let's fetch the destination and return it!
const requestUrl = new URL(request.url);
const fullUrl = proxyDestinationPrefix + requestUrl.pathname;
return await fetch(fullUrl);
};
}
```
Time to run it!
```ts
const config = {
"localhost:8314": "https://example.com",
"not-example.com": "https://text.npr.org",
};
const handler = mkHandler3(config);
serve(handler, { port: PORT });
```
First, try just making a request to `localhost:8314`. This is as easy as:
```bash
curl http://localhost:8314
```
This should load example.com, like we defined in our config. We did a simple
proxy, fetched the resource, and sent it back to the user. If there was a
resource that was not previously available to the public, but the reverse-proxy
could reach it, the public can now access it. Our reverse proxy feature is done.
Next, try making a request to not-example.com. However, we're going to use a
trick in curl to make it not resolve the address on its own, but force it to use
our domain. This trick is just for demonstration purposes, but it emulates a
real-world need to have multiple domains pointed to the same IP (for example,
for serving the redirect from company.com to www.company.com on the same server)
```bash
curl --connect-to not-example.com:80:localhost:8314 http://not-example.com
```
This should produce the HTML for NPR's text-only page. This demonstrates that we
can serve different content depending on the site that's requested.
## Conclusion
This is a very bare-bones implementation, and lacks lots of detail. To begin
with, none of the errors are handled, so if a rogue request comes in it could
take down the server.
Those improvements would be necessary for a production-ready implementation, but
not interesting for a blog post. For a non-exhaustive list of bigger
improvements, consider:
- Can we have the web server remove a prefix from the requested url's path name
if we want to serve a website from a non-root path?
- Can we allow the reverse-proxy to reject requests directly by IP?
- Can we wrap non-HTTP content?
- What are some performance improvements we could make?
In a potential future blog post, I'll explore some of these topics.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 984 KiB

View file

@ -1,144 +0,0 @@
---
title: Thoughts on personal organization
date: 2023-08-31T13:57:29.022Z
tags:
- organization
- logseq
heroImage: ./calendarHero.png
heroAlt: pastel colored stationery background with a bunch of calendars and personal organization tools in a crayon drawing style
---
Many people don't really use a calendar of any sort to manage their lives.
I get it. Putting events into a calendar is kind of a chore. It's a menial relic
from work and none of us want to even think about creating events during our
coveted personal hours. We want to live our lives free from the constraints of
the time boxes on our screens.
On top of that, traditional calendar apps still primarily use email for the most
part (sending invites, updating times, etc.) and the new generation of calendar
apps suffer from the social network problem of having to get everyone on the
same app.
But to some extent, it's still valuable to have things down in writing rather
than juggling it in our minds all the time.
Which is why it's such a shame that the personal management story has always
been kind of fragmented. Calendars are supposed to manage the entire picture of
my personal schedule, yet they only see a small slice of your life. The only
things calendars can see automatically with no intervention on my part are
emails that are sent from airlines.
> [!admonition: NOTE]
> I'm sure Google or Apple could probably ritz up their services to scan text
> and guess events to put on your calendar, but that's missing the point. The vast
> majority of people I associate with rarely coordinate events over email in the
> first place.
## Journals
For a while I've always wanted a kind of personal information manager: something
that would put all my information in one place and make it easy for me to query
across apps. When I embarked on this search I wouldn't have thought that the
most promising tool would end up being a journaling app.
(by journaling app I mean something like [Logseq], [Obsidian], [Notion],
[Workflowy] or [the][roam] [million][joplin] [other][craft]
[similar][stdnotes] [apps][bear] that allow you to write some markdown-ish
content, store it, and then never look back at it again)
[logseq]: https://logseq.com
[obsidian]: https://obsidian.md/
[notion]: https://www.notion.so/
[workflowy]: https://workflowy.com/
[roam]: https://roamresearch.com/
[joplin]: https://joplinapp.org/
[craft]: https://www.craft.do/
[stdnotes]: https://standardnotes.com/
[bear]: https://bear.app/
The world of journaling apps is vast but relatively undiverse. Most of the apps
just have the same features others do, minus one or two gimmicks that makes it a
ride or die. But there's one important feature that I have started looking out
for recently: the ability to attach arbitrary metadata to journal entries and be
able to query for them.
While new apps have been cropping up from time to time for a while now, I think
a common trend that's starting to emerge is that these "journals" are really
more like personal databases. Extracting structured fields is extremely
important if you want any kind of smart understanding of what is being
journaled.
For example, I could write "weighed in at 135 pounds today", but if I wanted to
find previous instances of this or make any kind of history, I would have to
essentially do a pure text search. However, with structured data this could be
different.
[Logseq], the app that I've settled on, is backed by a real database, and most
importantly exposes a lot of this functionality to you as a user. It allows you
to query directly on properties that you write into your daily journal or any
other page, for example like this:
![recording some property in logseq](./minicross.png)
What you're seeing is me using my daily journals to add a todo item for reading
a paper and tracking how long it takes me to do the [NY Times daily
crossword][minicross] (which I've shortened to minicross). I just add these to
my journal as it comes up throughout my day, but Logseq is able to index this
and serve it back to me in a very structured way:
[datascript]: https://github.com/tonsky/datascript
[minicross]: https://www.nytimes.com/crosswords/game/mini
![performing a query in logseq](./logseqQuery.png)
With this, I could go on to construct a graph and see historical data of how I
did over time. You can see how this could be used for more personal tracking
things like workout records or grocery trackers.
The query tool is very simple and easy to learn, and makes it easy to actually
_use_ the information you wrote down, instead of just burying it into oblivion.
For example, I can write todo items inline in my journal and find them all at a
time as well. Here's all of the todo items that I've tagged specifically with
the tag `#read`:
![reading list in logseq](./readingList.png)
Notice how the paper I added as a todo helpfully shows up here. No need for a
separate todo list or planning tool!
The fact that it truly is a database means I can just shove all kinds of
unrelated information into my journal, do some very trivial labeling and get
some really powerful uses out of it.
In the future I'd like to do dumps for my sleep and health data as well
and have Logseq be my ultimate source of truth. I've started developing a
[calendar plugin for Logseq][2] that will have the ability to display numerical
data using various visualizations for this purpose.
[2]: https://git.mzhang.io/michael/logseq-calendar
> [!admonition: NOTE]
> As an aside, this isn't sponsored in any way. While this post makes me sound
> like just a Logseq shill, it's actually quite the opposite: they're an
> open-source project solely funded by donations. I've been donating to them
> monthly on [Open Collective] and they've been actively developing really cool
> features!
[open collective]: https://opencollective.com/logseq
## Privacy
Because people are dumping so much of their lives into journals, it's absolutely
crucial that boundaries are clear. Without control, this would be a dream come
true for any data collection company: rather than having to go out and gather
the data, users are entering and structuring it all by themselves.
**End-to-end encryption** is a technique that ensures data is never able to be
accessed by your storage or synchronization providers. If you are in the market
for some kind of personal tracking app, make sure it talks about end-to-end
encryption as a feature. While it's [not the end-all-be-all of security][1],
it's certainly a big first step. Do careful research before deciding who to
trust with your data.
[1]: /posts/2021-10-31-e2e-encryption-useless-without-client-freedom

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 190 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 630 KiB

View file

@ -1,249 +0,0 @@
---
title: Building a formal CEK machine in Agda
draft: true
date: 2023-09-01T13:53:23.974Z
tags:
- computer-science
- programming-languages
- formal-verification
- lambda-calculus
heroImage: ./header.jpg
heroAlt: gears spinning wallpaper
math: true
toc: true
---
Back in 2022, I took a special topics course, CSCI 8980, on [reasoning about
programming languages using Agda][plfa], a dependently typed meta-language. For
the term project, we were to implement a simply-typed lambda calculus with
several extensions, along with proofs of certain properties.
[plfa]: https://plfa.github.io/
My lambda calculus implemented `call/cc` on top of a CEK machine.
<details>
<summary><b>Why is this interesting?</b></summary>
Reasoning about languages is one way of ensuring whole-program correctness.
Building up these languages from foundations grounded in logic helps us
achieve our goal with more rigor.
As an example, suppose I wrote a function that takes a list of numbers and
returns the maximum value. Mathematically speaking, this function would be
_non-total_; an input consisting of an empty set would not produce reasonable
output. If this were a library function I'd like to tell people who write code
that uses this function "don't give me an empty list!"
But just writing this in documentation isn't enough. What we'd really like is
for a tool (like a compiler) to tell any developer who is trying to pass an
empty list into our maximum function "You can't do that." Unfortunately, most
of the popular languages being used today have no way of describing "a list
that's not empty."
We still have a way to prevent people from running into this problem, though
it involves pushing the problem to runtime rather than compile time. The
maximum function could return an "optional" maximum. Some languages'
implementations of optional values force programmers to handle the "nothing"
case, while others ignore it silently. But in the more optimistic case, even
if the list was empty, the caller would have handled it and treated it
accordingly.
This isn't a pretty way to solve this problem. _Dependent types_ gives us
tools to solve this problem in an elegant way, by giving the type system the
ability to contain values. This also opens its own can of worms, but for
questions about program correctness, it is more valuable than depending on
catching problems at runtime.
</details>
## Crash course on the lambda calculus
The [lambda calculus] is a mathematical abstraction for computation. The core
mechanism it uses is a concept called a _term_. Everything that can be
represented in a lambda calculus is some combination of terms. A term can have
several constructors:
[lambda calculus]: https://en.wikipedia.org/wiki/Lambda_calculus
- **Var.** This is just a variable, like $x$ or $y$. By itself it holds no
meaning, but during evaluation, the evaluation _environment_ holds a mapping
from variable names to the values. If the environment says $\{ x = 5 \}$, then
evaluating $x$ would result in $5$.
- **Abstraction, or lambda ($\lambda$).** An _abstraction_ is a term that describes some
other computation. From an algebraic perspective, it can be thought of as a
function with a single argument (i.e $f(x) = 2x$ is an abstraction, although
it would be written using the notation $\lambda x.2x$)
- **Application.** Application is sort of the opposite of abstraction, exposing
the computation that was abstracted away. From an algebraic perspective,
this is just function application (i.e applying $f(x) = 2x$ to $3$ would
result in $2 \times 3 = 6$. Note that only a simple substitution has been done
and further evaluation is required to reduce $2\times 3$)
### Why?
The reason it's set up this way is so we can reason about terms inductively.
Rather than having lots of syntax for making it easier for programmers to write
a for loop as opposed to a while loop, or constructing different kinds of
values, the lambda calculus focuses on function abstraction and calls, and
strips everything else away.
The idea is that because terms are just nested constructors, we can describe the
behavior of any term by just defining the behavior of these 3 constructors. The
flavorful features of other programming languages can be implemented on top of
the function call rules in ways that don't disrupt the basic function of the
evaluation.
In fact, the lambda calculus is [Turing-complete][tc], so any computation can
technically be reduced to those 3 constructs. I used numbers liberally in the
examples above, but in a lambda calculus without numbers, you could define
integers using a recursive strategy called [Church numerals]. It works like this:
[church numerals]: https://en.wikipedia.org/wiki/Church_encoding
- $z$ represents zero.
- $s$ represents a "successor", or increment function. So:
- $s(z)$ represents 1,
- $s(s(z))$ represents 2
- and so on.
In lambda calculus terms, this would look like:
| number | lambda calculus expression |
| ------ | ---------------------------------- |
| 0 | $\lambda s.(\lambda z.z)$ |
| 1 | $\lambda s.(\lambda z.s(z))$ |
| 2 | $\lambda s.(\lambda z.s(s(z)))$ |
| 3 | $\lambda s.(\lambda z.s(s(s(z))))$ |
In practice, many lambda calculus incorporate higher level constructors such as
numbers or lists to make it so we can avoid having to represent them using only
a series of function calls. However, any time we add more syntax to a language,
we increase its complexity in proofs, so for now let's keep it simple.
### The Turing completeness curse
As I noted above, the lambda calculus is [_Turing-complete_][tc]. One feature of
Turing complete systems is that they have a (provably!) unsolvable "halting"
problem. Most of the simple terms shown above terminate predictably. But as an
example of a term that doesn't halt, consider the _Y combinator_, an example of
a fixed-point combinator:
[tc]: https://en.wikipedia.org/wiki/Turing_completeness
$$
Y = \lambda f.(\lambda x.f(x(x)))(\lambda x.f(x(x)))
$$
That's quite a mouthful. If you tried calling $Y$ on some term, you will find
that evaluation will quickly expand infinitely. That makes sense given its
purpose: to find a _fixed point_ of whatever function you pass in.
> [!admonition: NOTE]
> As an example, the fixed-point of the function $f(x) = \sqrt{x}$ is $1$.
> That's because $f(1) = 1$, and applying $f$ to any other number sort of
> converges in on this value. If you took any number and applied $f$ infinitely
> many times on it, you would get $1$.
>
> In this sense, the Y combinator can be seen as a sort of brute-force approach
> of finding this fixed point by simply applying the function over and over until
> the result stops changing. In the untyped lambda calculus, this can be used to
> implement simple (but possibly unbounded) recursion.
This actually proves disastrous for trying to reason about the logic of a
program. If something is able to recurse on itself without limit, we won't be
able to tell what its result is, and we _definitely_ won't be able to know if
the result is correct. This is why we typically ban unbounded recursion in
proof systems. In fact, you can give proofs for false statements using infinite
recursion.
This is why we actually prefer _not_ to work with Turing-complete languages when
doing logical reasoning on program evaluation. Instead, we always want to add
some constraints on it to make evaluation total, ensuring that we have perfect
information about our program's behavior.
### Simply-typed lambda calculus
The [simply-typed lambda calculus] (STLC, or the notational variant
$\lambda^\rightarrow$) adds types to every term. Types are crucial to any kind
of static program analysis. Suppose I was trying to apply the term $5$ to $6$ (in
other words, call $5$ with the argument $6$ as if $5$ was a function, like
$5(6)$). As humans we can look at that and instantly recognize that the
evaluation would be invalid, yet under the untyped lambda calculus, it would be
completely representable.
[simply-typed lambda calculus]: https://en.wikipedia.org/wiki/Simply_typed_lambda_calculus
To solve this in STLC, we would make this term completely unrepresentable at
all. To say you want to apply $5$ to $6$ would not be a legal STLC term. We do
this by requiring that all STLC terms are untyped lambda calculus terms
accompanied by a _type_.
This gives us more information about what's allowed before we run the
evaluation. For example, numbers may have their own type $\mathbb{N}$ (read
"nat", for "natural number") and booleans are $\mathrm{Bool}$, while functions
have a special "arrow" type $\_\rightarrow\_$, where the underscores represent
other types. A function that takes a number and returns a boolean (like isEven)
would have the type $\mathbb{N} \rightarrow \mathrm{Bool}$, while a function
that takes a boolean and returns another boolean would be $\mathrm{Bool}
\rightarrow \mathrm{Bool}$.
With this, we have a framework for rejecting terms that would otherwise be legal
in untyped lambda calculus, but would break when we tried to evaluate them. A
function application would be able to require that the argument is the same type
as what the function is expecting.
The nice property you get now is that all valid STLC programs will never get
_stuck_, which is being unable to evaluate due to some kind of error. Each term
will either be able to be evaluated to a next state, or is done.
A semi-formal definition for STLC terms would look something like this:
- **Var.** Same as before, it's a variable that can be looked up in the
environment.
- **Abstraction, or lambda ($\lambda$).** This is a function that carries three pieces
of information:
1. the name of the variable that its input will be substituted for
2. the _type_ of the input, and
3. the body in which the substitution will happen.
- **Application.** Same as before.
It doesn't really seem like changing just one term changes the language all that
much. But as a result of this tiny change, _every_ term now has a type:
- $5 :: \mathbb{N}$
- $λ(x:\mathbb{N}).2x :: \mathbb{N} \rightarrow \mathbb{N}$
- $isEven(3) :: (\mathbb{N} \rightarrow \mathrm{Bool}) · \mathbb{N} = \mathrm{Bool}$
> [!admonition: NOTE]
> Some notation:
>
> - $x :: T$ means $x$ has type $T$, and
> - $f · x$ means $f$ applied to $x$
This also means that some values are now unrepresentable:
- $isEven(λx.2x)$ wouldn't work anymore because the type of the inner argument
$λx.2x$ would be $\mathbb{N} \rightarrow \mathbb{N}$ can't be used as an input
for $isEven$, which is expecting a $\mathbb{N}$.
We have a good foundation for writing programs now, but this by itself can't
qualify as a system for computation. We need an abstract machine of sorts that
can evaluate these symbols and actually compute on them.
In practice, there's a number of different possible abstract machines that can
evaluate the lambda calculus. Besides the basic direct implementation, alternate
implementations such as [interaction nets] have become popular due to being able
to be parallelized efficiently.
[interaction nets]: https://en.wikipedia.org/wiki/Interaction_nets
## CEK machine
A CEK machine is responsible for evaluating a lambda calculus term.

View file

@ -1,12 +0,0 @@
---
title: Hindley-Milner type inference
date: 2023-09-01T13:06:55.614Z
tags:
- type-theory
draft: true
---
Today, a lot of languages have a feature called something along the lines of
"type inference". The idea is that through the way variables are passed and
used, you can make guesses about what type it should be, and preemptively point
out invalid or incompatible uses. Let's talk about how it works.

Some files were not shown because too many files have changed in this diff Show more