From ef6cb8cb53bc0f39be30c92e4b764072fd30de4b Mon Sep 17 00:00:00 2001
From: Adam Chlipala <adamc@csail.mit.edu>
Date: Mon, 20 Feb 2017 15:24:51 -0500
Subject: [PATCH] Comment DataAbstraction

---
 DataAbstraction.v | 203 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 199 insertions(+), 4 deletions(-)

diff --git a/DataAbstraction.v b/DataAbstraction.v
index db3f8a0..7c528ba 100644
--- a/DataAbstraction.v
+++ b/DataAbstraction.v
@@ -8,30 +8,80 @@ Require Import Frap.
 Set Implicit Arguments.
 
 
+(* Perhaps the essence of effective programming is division of large tasks into
+ * smaller ones, and *data abstraction* is a key technique to that end.
+ * We provide a clear separation between *interfaces* and *implementations*.
+ * The author of a library can take responsibility for making it implement an
+ * interface faithfully, *encapsulating* private state and other implementation
+ * details in a way that client code can't observe.  Then that client code can
+ * mix and match implementations of some well-specified functionality.
+ *
+ * As part of our quick tour through effective Coq programming, we will dwell on
+ * patterns for data abstraction, including how to state it formally, from the
+ * perspectives of both libraries and client code. *)
+
+
+(** * Specification styles for data abstraction *)
+
+(* One of the classic formalisms for data abstraction is the *algebraic* style,
+ * where requirements on implementations are written out as quantified
+ * equalities.  Any implementation must satisfy these equalities, but we grant
+ * implementations freedom in internal details. *)
 Module Algebraic.
+  (* Here's an example of an algebraic interface or *specification* ("spec" for
+   * short).  It's for purely function queues, which follow first-in-first-out
+   * disciplines. *)
   Module Type QUEUE.
     Parameter t : Set -> Set.
+    (* An implementation must include some data type [t].
+     * Actually, it's more of a *type family*, e.g. like [list] and some other
+     * polymorphic container types we looked at last time. *)
 
     Parameter empty : forall A, t A.
+    (* For any type [A] of data, we can build a queue for that element type. *)
     Parameter enqueue : forall A, t A -> A -> t A.
+    (* Enqueue a new element, in the functional style, where we build a new
+     * queue instead of modifying the original. *)
     Parameter dequeue : forall A, t A -> option (t A * A).
+    (* Given a queue, either return [None] if it is empty or [Some (q', v)] for
+     * the result of dequeuing one element, where [q'] is the new queue (now
+     * one element shorter) and [v] is the element we dequeue. *)
 
+    (* Which algebraic properties characterize correct queues? *)
+
+    (* First, [dequeue] returns [None] exactly on empty queues. *)
     Axiom dequeue_empty : forall A,
         dequeue (empty A) = None.
     Axiom empty_dequeue : forall A (q : t A),
         dequeue q = None -> q = empty A.
+
+    (* Second, [dequeue] forms a kind of inverse for [enqueue]. *)
     Axiom dequeue_enqueue : forall A (q : t A) x,
         dequeue (enqueue q x) = Some (match dequeue q with
                                       | None => (empty A, x)
                                       | Some (q', y) => (enqueue q' x, y)
                                       end).
+
+    (* These properties turn out to be enough to prove interesting properties
+     * about client code that uses queues.  Before we get there, we should
+     * define some concrete queue implementations.  (If we don't give an
+     * implementation, we often realize that the spec is *unrealizable*!) *)
   End QUEUE.
 
+  (* First, there is a fairly straightforward implementation with lists. *)
   Module ListQueue : QUEUE.
     Definition t : Set -> Set := list.
+    (* Note that we use identifier [list] alone as a first-class type family,
+     * without specifying a parameter explicitly. *)
 
+    (* We follow the convention of enqueuing onto the front of lists and
+     * dequeuing from the back, so the first two operations are just the first
+     * two constructors of [list]. *)
     Definition empty A : t A := nil.
     Definition enqueue A (q : t A) (x : A) : t A := x :: q.
+
+    (* [dequeue] is a little more work: we use recursion to step down to the
+     * last element of a list. *)
     Fixpoint dequeue A (q : t A) : option (t A * A) :=
       match q with
       | [] => None
@@ -42,6 +92,9 @@ Module Algebraic.
         end
       end.
 
+    (* Applying our experience so far with Coq proofs, the algebraic laws are
+     * unremarkable to establish. *)
+
     Theorem dequeue_empty : forall A, dequeue (empty A) = None.
     Proof.
       simplify.
@@ -80,17 +133,33 @@ Module Algebraic.
     Qed.
   End ListQueue.
 
+  (* There are software-engineering benefits to interface-implementation
+   * separation even when one only bothers to build a single implementation.
+   * However, often there are naive and clever optimized versions of a single
+   * interface.  Queues are no exception.  Before we get to a truly clever
+   * version, we'll demonstrate with a less obviously better version:
+   * enqueuing at the back and dequeuing from the front. *)
   Module ReversedListQueue : QUEUE.
     Definition t : Set -> Set := list.
+    (* Still the same internal queue type, but note that Coq's type system
+     * prevents client code from knowing that fact!  [t] appears *opaque*
+     * or *abstract* from the outside, as we'll see shortly. *)
 
+    (* The first two operations are similar, but now we enqueue at the
+     * list end. *)
     Definition empty A : t A := [].
     Definition enqueue A (q : t A) (x : A) : t A := q ++ [x].
+
+    (* [dequeue] is now constant time, with no recursion and just a single
+     * pattern match. *)
     Definition dequeue A (q : t A) : option (t A * A) :=
       match q with
       | [] => None
       | x :: q' => Some (q', x)
       end.
 
+    (* The proofs of the laws are still boring. *)
+
     Theorem dequeue_empty : forall A, dequeue (empty A) = None.
     Proof.
       simplify.
@@ -124,13 +193,27 @@ Module Algebraic.
     Qed.
   End ReversedListQueue.
 
+  (* Let's take a look at some client code that is agnostic to queue
+   * implementation details.  We have been using Coq's *module system*, inspired
+   * by those of the ML programming languages, to encode interfaces and
+   * implementations.  Coq also adopts from ML the idea of *functors*, or
+   * functions from modules to modules. *)
   Module DelayedSum (Q : QUEUE).
+    (* The code in this scope may refer to some unknown implementation [Q] of
+     * the [QUEUE] interface. *)
+
+    (* We will only use a simple example here: enqueue the first [n] natural
+     * numbers and then successively dequeue them, computing the sum as we
+     * go. *)
+
+    (* First, the function to enqueue the first [n] natural numbers. *)
     Fixpoint makeQueue (n : nat) (q : Q.t nat) : Q.t nat :=
       match n with
       | 0 => q
       | S n' => makeQueue n' (Q.enqueue q n')
       end.
 
+    (* Next, the function to dequeue repeatedly, keeping a sum. *)
     Fixpoint computeSum (n : nat) (q : Q.t nat) : nat :=
       match n with
       | 0 => 0
@@ -140,17 +223,25 @@ Module Algebraic.
                 end
       end.
 
+    (* This function gives the expected answer, in a simpler form, of
+     * [computeSum] after [makeQueue]. *)
     Fixpoint sumUpto (n : nat) : nat :=
       match n with
       | 0 => 0
       | S n' => n' + sumUpto n'
       end.
 
+    (* A crucial lemma: what results from dequeuing out of a [makeQueue]
+     * call?  The answer depends on whether the initial queue [q] has anything
+     * to dequeue. *)
     Lemma dequeue_makeQueue : forall n q,
         Q.dequeue (makeQueue n q)
         = match Q.dequeue q with
-          | Some (q', v) => Some (makeQueue n q', v)
+          | Some (q', v) =>
+            (* The queue we started with had content.  We dequeue from it. *)
+            Some (makeQueue n q', v)
           | None =>
+            (* No content in initial queue.  We get [n-1] (unless [n = 0]). *)
             match n with
             | 0 => None
             | S n' => Some (makeQueue n' q, n')
@@ -168,15 +259,18 @@ Module Algebraic.
       simplify.
       rewrite IHn.
       rewrite Q.dequeue_enqueue.
+      (* ^-- Crucial step!  First use of a law from the interface. *)
       cases (Q.dequeue q).
       cases p.
       equality.
 
       rewrite (Q.empty_dequeue (q := q)).
+      (* ^-- Another law! *)
       equality.
       assumption.
     Qed.
 
+    (* Now we can tackle the final property directly by induction. *)
     Theorem computeSum_ok : forall n,
         computeSum n (makeQueue n (Q.empty nat)) = sumUpto n.
     Proof.
@@ -195,26 +289,45 @@ Module Algebraic.
   End DelayedSum.
 End Algebraic.
 
+(* There is a famous implementation of queues with two stacks, achieving
+ * amortized constant time for all operations, in contrast to the worst-case
+ * quadratic time of both queue implementations we just saw.  However, to
+ * justify this fancy implementation, we will need to choose a more permissive
+ * interface, based on the idea of parameterizing over an arbitrary *equivalence
+ * relation* between queues, which need not be simple equality. *)
 Module AlgebraicWithEquivalenceRelation.
   Module Type QUEUE.
+    (* We still have a type family of queues, plus the same three operations. *)
     Parameter t : Set -> Set.
 
+
     Parameter empty : forall A, t A.
     Parameter enqueue : forall A, t A -> A -> t A.
     Parameter dequeue : forall A, t A -> option (t A * A).
 
+    (* What's new?  This equivalence relation.  The type [Prop] stands for
+     * logical truth values, so a function returning it can be seen as a
+     * relation in the usual mathematical sense.  This is a *binary* relation,
+     * in particular, since it takes two arguments. *)
     Parameter equiv : forall A, t A -> t A -> Prop.
 
+    (* Let's declare convenient syntax for the relation. *)
     Infix "~=" := equiv (at level 70).
 
+    (* It really is an equivalence relation, as formalized by the usual three
+     * laws. *)
     Axiom equiv_refl : forall A (a : t A), a ~= a.
     Axiom equiv_sym : forall A (a b : t A), a ~= b -> b ~= a.
     Axiom equiv_trans : forall A (a b c : t A), a ~= b -> b ~= c -> a ~= c.
 
+    (* It must be the case that enqueuing elements preserves the relation. *)
     Axiom equiv_enqueue : forall A (a b : t A) (x : A),
         a ~= b
         -> enqueue a x ~= enqueue b x.
 
+    (* We define a derived relation for results of [dequeue]: either both
+     * [dequeue]s failed to return anything, or both returned the same data
+     * value along with new queues that are themselves related. *)
     Definition dequeue_equiv A (a b : option (t A * A)) :=
       match a, b with
       | None, None => True
@@ -228,6 +341,9 @@ Module AlgebraicWithEquivalenceRelation.
         a ~= b
         -> dequeue a ~~= dequeue b.
 
+    (* We retain the three axioms from the prior interface, using our fancy
+     * relation instead of equality on queues. *)
+
     Axiom dequeue_empty : forall A,
         dequeue (empty A) = None.
     Axiom empty_dequeue : forall A (q : t A),
@@ -241,6 +357,8 @@ Module AlgebraicWithEquivalenceRelation.
             end.
   End QUEUE.
 
+  (* It's easy to redo [ListQueue], specifying normal equality for the
+   * equivalence relation. *)
   Module ListQueue : QUEUE.
     Definition t : Set -> Set := list.
 
@@ -347,11 +465,27 @@ Module AlgebraicWithEquivalenceRelation.
     Qed.
   End ListQueue.
 
+  (* However, now we can implement the classic two-stacks optimized queue! *)
   Module TwoStacksQueue : QUEUE.
+    (* Every queue is a pair of stacks: one for enqueuing and one for
+     * dequeuing. *)
     Record stackpair (A : Set) := {
       EnqueueHere : list A;
+      (* This stack has more recently enqueued elements closer to the front,
+       * making enqueuing constant-time. *)
+
       DequeueHere : list A
+      (* This stack has least recently enqueued elements closer to the front,
+       * making dequeuing constant-time. *)
     }.
+    (* What's the catch?  Sometimes we need to reverse [EnqueueHere] and
+     * transfer it to [DequeueHere], or otherwise there would never be anything
+     * to dequeue!  Luckily, the work we do in transfering is bounded
+     * asymptotically by the total number of enqueue/dequeue operations, so
+     * we get *amortized* constant time. *)
+
+    (* By the way, the [Record] feature we used above is similar to e.g. structs
+     * in C. *)
 
     Definition t := stackpair.
 
@@ -368,6 +502,8 @@ Module AlgebraicWithEquivalenceRelation.
       | x :: dq => Some ({| EnqueueHere := q.(EnqueueHere);
                             DequeueHere := dq |}, x)
       | [] =>
+        (* Out of dequeuable elements.  Reverse enqueued elements
+         * and transfer to the other stack. *)
         match rev q.(EnqueueHere) with
         | [] => None
         | x :: eq => Some ({| EnqueueHere := [];
@@ -375,9 +511,12 @@ Module AlgebraicWithEquivalenceRelation.
         end
       end.
 
+    (* This function explains which simple queue representation we have in mind,
+     * for each fancy two-stack representation. *)
     Definition elements A (q : t A) : list A :=
       q.(EnqueueHere) ++ rev q.(DequeueHere).
 
+    (* That function is useful to define our equivalence relation. *)
     Definition equiv A (a b : t A) :=
       elements a = elements b.
     Infix "~=" := equiv (at level 70).
@@ -397,6 +536,9 @@ Module AlgebraicWithEquivalenceRelation.
       equality.
     Qed.
 
+    (* Now it is mostly routine to prove the laws, though a few tricks may
+     * be worth reading through. *)
+
     Theorem equiv_enqueue : forall A (a b : t A) (x : A),
         a ~= b
         -> enqueue a x ~= enqueue b x.
@@ -544,6 +686,9 @@ Module AlgebraicWithEquivalenceRelation.
     Qed.
   End TwoStacksQueue.
 
+  (* The exercise of the generic delayed sum may be repeated with equivalence
+   * relations. *)
+
   Module DelayedSum (Q : QUEUE).
     Fixpoint makeQueue (n : nat) (q : Q.t nat) : Q.t nat :=
       match n with
@@ -732,6 +877,12 @@ Module AlgebraicWithEquivalenceRelation.
   End DelayedSum.
 End AlgebraicWithEquivalenceRelation.
 
+(* It's worth presenting one final style of data-abstraction formalism: we
+ * introduce *representation functions* in the interface, to map the internal
+ * representation to some standard one that is easy to reason about.  We don't
+ * expect "real code" to call the representation function.  Instead, it's just a
+ * proof device to let us write convincing laws.  Here's the previous example
+ * redone in this manner, without comment. *)
 Module RepFunction.
   Module Type QUEUE.
     Parameter t : Set -> Set.
@@ -984,9 +1135,14 @@ Module RepFunction.
 End RepFunction.
 
 
+(** * Data abstraction with fixed parameter types *)
+
+(* Finite sets are another classic *abstract data type*, another name for what
+ * we have been defining so far with modules.  Here's a generic finite-set
+ * interface, following the first algebraic style we saw above. *)
 Module Type FINITE_SET.
-  Parameter key : Set.
-  Parameter t : Set.
+  Parameter key : Set. (* What type of data may be added to these sets? *)
+  Parameter t : Set.   (* What is the type of sets themselves? *)
 
   Parameter empty : t.
   Parameter add : t -> key -> t.
@@ -1001,8 +1157,17 @@ Module Type FINITE_SET.
       -> member (add s k1) k2 = member s k2.
 
   Axiom decidable_equality : forall a b : key, a = b \/ a <> b.
+  (* This last axiom may be a bit surprising.  Coq is so oriented toward
+   * computation that we don't assume the *law of the excluded middle*, which
+   * says that every proposition is either true or false.  Instead, we prove
+   * specific instances as needed.  But feel free to ignore this point for
+   * the purposes of this class. *)
 End FINITE_SET.
 
+(* We want a generic implementation of finite sets, as found in the standard
+ * libaries of languages like Java.  However, not just any key set will do.
+ * We need enough computable operations.  One sufficient operation is an
+ * equality test. *)
 Module Type SET_WITH_EQUALITY.
   Parameter t : Set.
   Parameter equal : t -> t -> bool.
@@ -1010,6 +1175,11 @@ Module Type SET_WITH_EQUALITY.
   Axiom equal_ok : forall a b, if equal a b then a = b else a <> b.
 End SET_WITH_EQUALITY.
 
+(* Here's a generic implementation of finite sets, parameterized over an
+ * arbitrary set with a correct equality operation.  Note the use of the [with]
+ * operator to *refine* the result signature [FINITE_SET]: we reveal that the
+ * [key] type is actually [SE.T], that is the key type from the parameter module
+ * [SE]. *)
 Module ListSet(SE : SET_WITH_EQUALITY) <: FINITE_SET with Definition key := SE.t.
   Definition key := SE.t.
   Definition t := list SE.t.
@@ -1057,6 +1227,7 @@ Module ListSet(SE : SET_WITH_EQUALITY) <: FINITE_SET with Definition key := SE.t
   Qed.
 End ListSet.
 
+(* Here's an example decidable-equality type for natural numbers. *)
 Module NatWithEquality <: SET_WITH_EQUALITY with Definition t := nat.
   Definition t := nat.
 
@@ -1084,8 +1255,10 @@ Module NatWithEquality <: SET_WITH_EQUALITY with Definition t := nat.
   Qed.
 End NatWithEquality.
 
+(* And here's how to instantiate the generic set for the naturals. *)
 Module NatSet := ListSet(NatWithEquality).
 
+(* Now, some generic client code, for testing duplicate-freeness of lists. *)
 Module FindDuplicates (FS : FINITE_SET).
   Fixpoint noDuplicates' (ls : list FS.key) (s : FS.t) : bool :=
     match ls with
@@ -1095,9 +1268,13 @@ Module FindDuplicates (FS : FINITE_SET).
 
   Definition noDuplicates (ls : list FS.key) := noDuplicates' ls FS.empty.
 
+  (* A characterization of having a duplicate: the list can be partitioned into
+   * pieces revealing the same element [a] at two boundaries. *)
   Definition hasDuplicate (ls : list FS.key) :=
     exists ls1 a ls2 ls3, ls = ls1 ++ a :: ls2 ++ a :: ls3.
 
+  (* A characterization of containing an element [a]: the list can be
+   * partitioned into two pieces, with [a] at the boundary. *)
   Definition contains (a : FS.key) (ls : list FS.key) :=
     exists ls1 ls2, ls = ls1 ++ a :: ls2.
 
@@ -1227,16 +1404,31 @@ Compute NatDuplicateFinder.noDuplicates [1; 2].
 Compute NatDuplicateFinder.noDuplicates [1; 2; 3].
 Compute NatDuplicateFinder.noDuplicates [1; 2; 1; 3].
 
+
+(** * Custom implementations of abstract data types *)
+
+(* Sometimes we want to write custom implementations of polymorphic data types.
+ * Our last example of duplicate detection is a good one: we can make it much
+ * faster when we know something about how the lists will be built.  In
+ * particular, finite sets of natural numbers can be made compact when we know
+ * that the common case is *intervals*, sets of consecutive numbers. *)
 Module NatRangeSet <: FINITE_SET with Definition key := nat.
   Definition key := nat.
 
   Inductive rangeSet : Set :=
   | Empty
+    (* Set has no elements. *)
   | Range (from to : nat)
-  | AdHoc (s : NatSet.t).
+    (* Set contains exactly the numbers from [from] to [to], inclusive. *)
+  | AdHoc (s : NatSet.t)
+    (* Set isn't an interval, so fall back on the list-based version. *).
 
   Definition t := rangeSet.
 
+  (* When we realize that a freshly formed set isn't an interval, we often need
+   * to convert an interval to an ad-hoc set.  These functions accomplish
+   * that. *)
+
   Fixpoint fromRange' (from to : nat) : NatSet.t :=
     match to with
     | 0 => NatSet.add NatSet.empty 0
@@ -1544,6 +1736,9 @@ Module NatRangeSet <: FINITE_SET with Definition key := nat.
   Qed.
 End NatRangeSet.
 
+(* Time for a head-to-head performance contest between our naive and clever
+ * sets! *)
+
 Module FasterNatDuplicateFinder := FindDuplicates(NatRangeSet).
 
 Fixpoint upto (n : nat) : list nat :=