From 6e82102c1b6e8cf60c990626d5bbf2484d7e243b Mon Sep 17 00:00:00 2001 From: Michael Zhang Date: Tue, 17 Dec 2024 16:38:38 -0600 Subject: [PATCH] crdt post --- .vscode/settings.json | 3 + src/content/posts/2024-12-16-crdt.lagda.md | 294 +++++++++++++++++++++ src/styles/agda.scss | 4 +- 3 files changed, 299 insertions(+), 2 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 src/content/posts/2024-12-16-crdt.lagda.md diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..9e3a076 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "npm.packageManager": "pnpm" +} diff --git a/src/content/posts/2024-12-16-crdt.lagda.md b/src/content/posts/2024-12-16-crdt.lagda.md new file mode 100644 index 0000000..5f85af3 --- /dev/null +++ b/src/content/posts/2024-12-16-crdt.lagda.md @@ -0,0 +1,294 @@ +--- +title: CRDTs in Agda +slug: 2024-12-16-crdt +date: 2024-12-16 +tags: ["agda", "formalization"] +--- + +Today during my flight back from Boston, I did a small formalization of [conflict-free replicated data types, or CRDTs][crdt] (thanks Sun Country for not having wifi...). + +CRDTs are prevalent in the data synchronization world, and enable each peer to track their own changes, then merge those changes without conflict. +One of the nice guarantees of this system (and also a requirement of the data structures!) is that changes can be applied in any order, and they must eventually become consistent. +I'll present a way of modeling this using Agda, along with two simple example formalizations of data structures. + +[crdt]: https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type + +> [!admonition: NOTE] +> As usual, this is a [Literate agda][lagda] file. +> This means it is real code that type-checks. +> Click on any element to navigate to its definition! + +[lagda]: https://agda.readthedocs.io/en/latest/tools/literate-programming.html + +
+ Imports + +``` +module 2024-12-16-crdt where + +open import Agda.Primitive +open import Data.List using (List; _∷_; []) +open import Data.Nat using (ℕ; suc) +open import Data.Product +open import Data.Bool using (true; false) +open import Data.Unit using () +open import Data.Empty +open import Function +open import Relation.Binary.Bundles +open import Relation.Binary.Definitions +open import Relation.Binary.Consequences +open import Relation.Binary.Structures +import Relation.Binary.PropositionalEquality as Eq +open import Relation.Binary.PropositionalEquality hiding (isEquivalence; isDecEquivalence) +open Eq.≡-Reasoning +open import Relation.Nullary.Decidable +open import Relation.Nullary.Negation +open import Relation.Nullary.Reflects +``` + +
+ +## Definition + +For this post, I'm doing an operation-based CRDT. +Since the C in CRDT stands for conflict-free: + +- The `apply` function is commutative. + We should be free to merge changes in any order. + This also guarantees eventual consistency (as long as all changes are replicated properly), since no matter what the changes are going to be, as long as they can commute arbitrarily, we can just re-order them. +- The `apply` function is total. + We can't have any errors being thrown. + All states must merge cleanly. + (NOTE: This is a protocol-level concept. We can still have conflicts in applications, they would just have to appear as a valid state in the CRDT.) + +Let's define our data structure: + +
+ Some boilerplate for abstracting over time + +``` +-- ignore the levels, they are there so `Time` can be abstracted over +module _ {ℓ cℓ timeℓ timeℓ₁ timeℓ₂ : Level} (TimeOrder : StrictTotalOrder timeℓ timeℓ₁ timeℓ₂) where + open StrictTotalOrder (TimeOrder) + using (compare; _<_; _≈_; _>_; asym; irrefl; isDecEquivalence) + renaming (Carrier to Time) + open IsDecEquivalence (isDecEquivalence) using () renaming (sym to symEqv) + + TCompare : Time → Time → Set (timeℓ₁ ⊔ timeℓ₂) + TCompare t1 t2 = Tri (t1 < t2) (t1 ≈ t2) (t1 > t2) +``` + +
+ +``` + -- This data structure represents a proof that the type A is a CRDT + record isCRDT {cℓ ℓ : Level} (A : Set cℓ) : Set (timeℓ ⊔ cℓ ⊔ lsuc ℓ) where + field + -- Some type representing all possible operations + op : Set ℓ + + -- The no-op. Having this lets me only require apply2 + noOp : op + + -- Apply 2 operations to a state, along with their timestamps + -- Note that there is no error state for this. + -- All operations must be cleanly applied. + apply2 : (o1 o2 : op) (t1 t2 : Time) → A → A + + -- The property that applying changes must be commutative. + comm : (a : A) → (o1 o2 : op) (t1 t2 : Time) → apply2 o1 o2 t1 t2 a ≡ apply2 o2 o1 t2 t1 a +``` + +I think in reality it would be the job of `op` to store the time, rather than the entire `isCRDT`, but this is the way I wrote it :sob: + +Another note is, I required `apply2` rather than just `apply` taking a single operation, because I have to express the fact that the data structure can choose to reorder my operations however it wants. + +## Example: counter + +Here is a simple example of a counter. The counter's only operation is _increment_, and the merging operation is trivial. + +``` + module _ where + open isCRDT + + -- A counter is represented by just a natural number + Counter = ℕ + + private + data Op : Set where + nop : Op + inc : Op + + CounterIsCRDT : isCRDT Counter + CounterIsCRDT .op = Op + CounterIsCRDT .noOp = nop +``` + +> [!admonition: NOTE] +> Because the rest of the code all lies inside this module, future code blocks are going to start with more indentation :( + +Now we need to define apply and commutativity: + +``` + CounterIsCRDT .apply2 = apply2' where + apply2' : Op → Op → Time → Time → Counter → Counter + apply2' nop nop t1 t2 c = c -- no change + apply2' nop inc t1 t2 c = suc c -- just add one + apply2' inc nop t1 t2 c = suc c -- just add one + apply2' inc inc t1 t2 c = suc (suc c) -- two inc's adds two + + CounterIsCRDT .comm = comm' where + apply2' = CounterIsCRDT .apply2 + + -- All cases are trivial, since the operations are symmetric by definition + comm' : (a : Counter) (o1 o2 : Op) (t1 t2 : Time) + → apply2' o1 o2 t1 t2 a ≡ apply2' o2 o1 t2 t1 a + comm' c nop nop t1 t2 = refl + comm' c nop inc t1 t2 = refl + comm' c inc nop t1 t2 = refl + comm' c inc inc t1 t2 = refl +``` + +## Example: last-write-win map + +Here's a slightly more complicated example: last-write-win maps. +Instead of using a built-in map module, I'm going to define some properties that a map should have. +Importantly, I'll require that there be some way of breaking ties between equal keys. + +
+ Boilerplate for setting up maps + +``` + record isMap (M K V : Set cℓ) : Set (cℓ ⊔ timeℓ) where + constructor mkIsMap + field + mget : M → K → V + mext : (m1 m2 : M) → ((k : K) → mget m1 k ≡ mget m2 k) → m1 ≡ m2 + minsert : (k : K) (v : V) (m : M) → Σ M (λ m' → mget m' k ≡ v) + + -- These functions are used to arbitrarily order two choices in case the time _and_ keys collide + -- It must be deterministic + mpick : (kvt1 kvt2 : K × V × Time) → (K × V × Time) × (K × V × Time) + mpickcomm : (kvt1 kvt2 : K × V × Time) → mpick kvt1 kvt2 ≡ mpick kvt2 kvt1 + + MapType : (K V : Set cℓ) → Set (lsuc cℓ ⊔ timeℓ) + MapType K V = Σ (Set cℓ) (λ M → isMap M K V) + + module _ (K V : Set cℓ) (Map : MapType K V) where + open isCRDT + + M = Map .proj₁ + open isMap (Map .proj₂) +``` + +
+ +It's time to define operations and apply. + +``` + private + data Op : Set cℓ where + nop : Op + insert : K → V → Op + + -- "deterministic pair" is how it's used, but this is really just two (K, V) pairs + record DetPair (K V : Set cℓ) : Set cℓ where + field + k₁ k₂ : K + v₁ v₂ : V + + module _ (k1 : K) (v1 : V) (t1 : Time) (k2 : K) (v2 : V) (t2 : Time) where + -- This helper takes the time comparison result and decides how to order the + -- resulting pair + mkDetPairHelper : TCompare t1 t2 → DetPair K V + mkDetPairHelper (tri< a ¬b ¬c) = + record { k₁ = k1 ; k₂ = k2 ; v₁ = v1 ; v₂ = v2 } + mkDetPairHelper (tri≈ ¬a b ¬c) = + let arb = mpick (k1 , v1 , t1) (k2 , v2 , t2) in + record + { k₁ = arb .proj₁ .proj₁ + ; v₁ = arb .proj₁ .proj₂ .proj₁ + ; k₂ = arb .proj₂ .proj₁ + ; v₂ = arb .proj₂ .proj₂ .proj₁ + } + mkDetPairHelper (tri> ¬a ¬b c) = + record { k₁ = k2 ; k₂ = k1 ; v₁ = v2 ; v₂ = v1 } + + mkDetPair : DetPair K V + mkDetPair = mkDetPairHelper (compare t1 t2) + + -- Define single apply + apply : Op → M → M + apply nop m = m + apply (insert k v) m = minsert k v m .proj₁ + + apply2' : Op → Op → Time → Time → M → M + apply2' nop nop t1 t2 m = m + apply2' o1 nop t1 t2 m = apply o1 m -- single apply is used + apply2' nop o2 t1 t2 m = apply o2 m -- in the NOP cases + apply2' (insert k1 v1) (insert k2 v2) t1 t2 m = + let p = mkDetPair k1 v1 t1 k2 v2 t2 in + apply (insert (p .k₂) (p .v₂)) (apply (insert (p .k₁) (p .v₁)) m) + where open DetPair +``` + +Commutativity is now just a matter of going back and verifying that those cases hold. +For the case where I `mpick`ed an arbitrary order, I used the required `mpickcomm` property to show that it commutes. + +``` + comm' : (m : M) (o1 o2 : Op) (t1 t2 : Time) → apply2' o1 o2 t1 t2 m ≡ apply2' o2 o1 t2 t1 m + comm' m nop nop t1 t2 = refl + comm' m (insert _ _) nop t1 t2 = refl + comm' m nop (insert _ _) t1 t2 = refl + comm' m (insert k1 v1) (insert k2 v2) t1 t2 = cong f (detPairLemma k1 k2 v1 v2 t1 t2) where + f : (dp : DetPair K V) → M + f dp = apply (insert (dp .k₂) (dp .v₂)) (apply (insert (dp .k₁) (dp .v₁)) m) + where open DetPair + + module _ (k1 k2 : K) (v1 v2 : V) (t1 t2 : Time) where + detPairLemmaHelper : (tc1 : TCompare t1 t2) (tc2 : TCompare t2 t1) → mkDetPairHelper k1 v1 t1 k2 v2 t2 tc1 ≡ mkDetPairHelper k2 v2 t2 k1 v1 t1 tc2 + -- Uninteresting cases. + -- Let me know if you know of a way to avoid having to list these cases! + detPairLemmaHelper (tri< a ¬b ¬c) (tri< a₁ ¬b₁ ¬c₁) = ⊥-elim (asym a a₁) + detPairLemmaHelper (tri< a ¬b ¬c) (tri≈ ¬a b ¬c₁) = ⊥-elim (irrefl (symEqv b) a) + detPairLemmaHelper (tri≈ ¬a b ¬c) (tri< a ¬b ¬c₁) = ⊥-elim (irrefl (symEqv b) a) + detPairLemmaHelper (tri≈ ¬a b ¬c) (tri> ¬a₁ ¬b c) = ⊥-elim (irrefl b c) + detPairLemmaHelper (tri> ¬a ¬b c) (tri≈ ¬a₁ b ¬c) = ⊥-elim (irrefl b c) + detPairLemmaHelper (tri> ¬a ¬b c) (tri> ¬a₁ ¬b₁ c₁) = ⊥-elim (asym c c₁) + -- Interesting cases. + detPairLemmaHelper (tri< a ¬b ¬c) (tri> ¬a ¬b₁ c) = refl + detPairLemmaHelper (tri≈ ¬a b ¬c) (tri≈ ¬a₁ b₁ ¬c₁) = cong pair pickComm where + kvt1 = k1 , v1 , t1 + kvt2 = k2 , v2 , t2 + pair : (K × V × Time) × (K × V × Time) → DetPair K V + pair ((k1 , v1 , t1) , (k2 , v2 , t2)) = record { k₁ = k1 ; k₂ = k2 ; v₁ = v1 ; v₂ = v2 } + pickComm : mpick kvt1 kvt2 ≡ mpick kvt2 kvt1 + pickComm = mpickcomm kvt1 kvt2 + detPairLemmaHelper (tri> ¬a ¬b c) (tri< a ¬b₁ ¬c) = refl + + detPairLemma : mkDetPair k1 v1 t1 k2 v2 t2 ≡ mkDetPair k2 v2 t2 k1 v1 t1 + detPairLemma = detPairLemmaHelper (compare t1 t2) (compare t2 t1) +``` + +Finally, to wrap up: + +``` + LWWMapIsCRDT : isCRDT M + LWWMapIsCRDT .op = Op + LWWMapIsCRDT .noOp = nop + LWWMapIsCRDT .apply2 = apply2' + LWWMapIsCRDT .comm = comm' +``` + +## Conclusion + +If I were to use this in a real product, I could write `{-# COMPILE ... #-}` and give +translations of all the constructs into Javascript, and then import this as a library. + +Anyway, this is just a toy implementation. +Let me know how this could be improved! + +As an aside, if you're interested in theorem proving (Lean/Agda), make sure to check out [Advent of Proof 2024][AOP]! +There have been plenty of cool problems so far. + +[aop]: https://typesig.pl/lean/agda/events/2024/12/12/advent-of-proof-2024.html diff --git a/src/styles/agda.scss b/src/styles/agda.scss index ee29950..e1d9bae 100644 --- a/src/styles/agda.scss +++ b/src/styles/agda.scss @@ -4,7 +4,7 @@ --blue: #5175be; --module: purple; --inductive-constructor: #008b00; - --comment: #b22222; + --comment: #a7a7a7; --string: var(--comment); } @@ -15,7 +15,7 @@ --blue: #9999FF; --module: hsl(276, 66%, 69%); --inductive-constructor: #32bb32; - --comment: hsl(0, 78%, 66%); + --comment: hsl(0, 0%, 47%); } }