Comment DataAbstraction

This commit is contained in:
Adam Chlipala 2017-02-20 15:24:51 -05:00
parent 69f6acb514
commit ef6cb8cb53

View file

@ -8,30 +8,80 @@ Require Import Frap.
Set Implicit Arguments.
(* Perhaps the essence of effective programming is division of large tasks into
* smaller ones, and *data abstraction* is a key technique to that end.
* We provide a clear separation between *interfaces* and *implementations*.
* The author of a library can take responsibility for making it implement an
* interface faithfully, *encapsulating* private state and other implementation
* details in a way that client code can't observe. Then that client code can
* mix and match implementations of some well-specified functionality.
*
* As part of our quick tour through effective Coq programming, we will dwell on
* patterns for data abstraction, including how to state it formally, from the
* perspectives of both libraries and client code. *)
(** * Specification styles for data abstraction *)
(* One of the classic formalisms for data abstraction is the *algebraic* style,
* where requirements on implementations are written out as quantified
* equalities. Any implementation must satisfy these equalities, but we grant
* implementations freedom in internal details. *)
Module Algebraic.
(* Here's an example of an algebraic interface or *specification* ("spec" for
* short). It's for purely function queues, which follow first-in-first-out
* disciplines. *)
Module Type QUEUE.
Parameter t : Set -> Set.
(* An implementation must include some data type [t].
* Actually, it's more of a *type family*, e.g. like [list] and some other
* polymorphic container types we looked at last time. *)
Parameter empty : forall A, t A.
(* For any type [A] of data, we can build a queue for that element type. *)
Parameter enqueue : forall A, t A -> A -> t A.
(* Enqueue a new element, in the functional style, where we build a new
* queue instead of modifying the original. *)
Parameter dequeue : forall A, t A -> option (t A * A).
(* Given a queue, either return [None] if it is empty or [Some (q', v)] for
* the result of dequeuing one element, where [q'] is the new queue (now
* one element shorter) and [v] is the element we dequeue. *)
(* Which algebraic properties characterize correct queues? *)
(* First, [dequeue] returns [None] exactly on empty queues. *)
Axiom dequeue_empty : forall A,
dequeue (empty A) = None.
Axiom empty_dequeue : forall A (q : t A),
dequeue q = None -> q = empty A.
(* Second, [dequeue] forms a kind of inverse for [enqueue]. *)
Axiom dequeue_enqueue : forall A (q : t A) x,
dequeue (enqueue q x) = Some (match dequeue q with
| None => (empty A, x)
| Some (q', y) => (enqueue q' x, y)
end).
(* These properties turn out to be enough to prove interesting properties
* about client code that uses queues. Before we get there, we should
* define some concrete queue implementations. (If we don't give an
* implementation, we often realize that the spec is *unrealizable*!) *)
End QUEUE.
(* First, there is a fairly straightforward implementation with lists. *)
Module ListQueue : QUEUE.
Definition t : Set -> Set := list.
(* Note that we use identifier [list] alone as a first-class type family,
* without specifying a parameter explicitly. *)
(* We follow the convention of enqueuing onto the front of lists and
* dequeuing from the back, so the first two operations are just the first
* two constructors of [list]. *)
Definition empty A : t A := nil.
Definition enqueue A (q : t A) (x : A) : t A := x :: q.
(* [dequeue] is a little more work: we use recursion to step down to the
* last element of a list. *)
Fixpoint dequeue A (q : t A) : option (t A * A) :=
match q with
| [] => None
@ -42,6 +92,9 @@ Module Algebraic.
end
end.
(* Applying our experience so far with Coq proofs, the algebraic laws are
* unremarkable to establish. *)
Theorem dequeue_empty : forall A, dequeue (empty A) = None.
Proof.
simplify.
@ -80,17 +133,33 @@ Module Algebraic.
Qed.
End ListQueue.
(* There are software-engineering benefits to interface-implementation
* separation even when one only bothers to build a single implementation.
* However, often there are naive and clever optimized versions of a single
* interface. Queues are no exception. Before we get to a truly clever
* version, we'll demonstrate with a less obviously better version:
* enqueuing at the back and dequeuing from the front. *)
Module ReversedListQueue : QUEUE.
Definition t : Set -> Set := list.
(* Still the same internal queue type, but note that Coq's type system
* prevents client code from knowing that fact! [t] appears *opaque*
* or *abstract* from the outside, as we'll see shortly. *)
(* The first two operations are similar, but now we enqueue at the
* list end. *)
Definition empty A : t A := [].
Definition enqueue A (q : t A) (x : A) : t A := q ++ [x].
(* [dequeue] is now constant time, with no recursion and just a single
* pattern match. *)
Definition dequeue A (q : t A) : option (t A * A) :=
match q with
| [] => None
| x :: q' => Some (q', x)
end.
(* The proofs of the laws are still boring. *)
Theorem dequeue_empty : forall A, dequeue (empty A) = None.
Proof.
simplify.
@ -124,13 +193,27 @@ Module Algebraic.
Qed.
End ReversedListQueue.
(* Let's take a look at some client code that is agnostic to queue
* implementation details. We have been using Coq's *module system*, inspired
* by those of the ML programming languages, to encode interfaces and
* implementations. Coq also adopts from ML the idea of *functors*, or
* functions from modules to modules. *)
Module DelayedSum (Q : QUEUE).
(* The code in this scope may refer to some unknown implementation [Q] of
* the [QUEUE] interface. *)
(* We will only use a simple example here: enqueue the first [n] natural
* numbers and then successively dequeue them, computing the sum as we
* go. *)
(* First, the function to enqueue the first [n] natural numbers. *)
Fixpoint makeQueue (n : nat) (q : Q.t nat) : Q.t nat :=
match n with
| 0 => q
| S n' => makeQueue n' (Q.enqueue q n')
end.
(* Next, the function to dequeue repeatedly, keeping a sum. *)
Fixpoint computeSum (n : nat) (q : Q.t nat) : nat :=
match n with
| 0 => 0
@ -140,17 +223,25 @@ Module Algebraic.
end
end.
(* This function gives the expected answer, in a simpler form, of
* [computeSum] after [makeQueue]. *)
Fixpoint sumUpto (n : nat) : nat :=
match n with
| 0 => 0
| S n' => n' + sumUpto n'
end.
(* A crucial lemma: what results from dequeuing out of a [makeQueue]
* call? The answer depends on whether the initial queue [q] has anything
* to dequeue. *)
Lemma dequeue_makeQueue : forall n q,
Q.dequeue (makeQueue n q)
= match Q.dequeue q with
| Some (q', v) => Some (makeQueue n q', v)
| Some (q', v) =>
(* The queue we started with had content. We dequeue from it. *)
Some (makeQueue n q', v)
| None =>
(* No content in initial queue. We get [n-1] (unless [n = 0]). *)
match n with
| 0 => None
| S n' => Some (makeQueue n' q, n')
@ -168,15 +259,18 @@ Module Algebraic.
simplify.
rewrite IHn.
rewrite Q.dequeue_enqueue.
(* ^-- Crucial step! First use of a law from the interface. *)
cases (Q.dequeue q).
cases p.
equality.
rewrite (Q.empty_dequeue (q := q)).
(* ^-- Another law! *)
equality.
assumption.
Qed.
(* Now we can tackle the final property directly by induction. *)
Theorem computeSum_ok : forall n,
computeSum n (makeQueue n (Q.empty nat)) = sumUpto n.
Proof.
@ -195,26 +289,45 @@ Module Algebraic.
End DelayedSum.
End Algebraic.
(* There is a famous implementation of queues with two stacks, achieving
* amortized constant time for all operations, in contrast to the worst-case
* quadratic time of both queue implementations we just saw. However, to
* justify this fancy implementation, we will need to choose a more permissive
* interface, based on the idea of parameterizing over an arbitrary *equivalence
* relation* between queues, which need not be simple equality. *)
Module AlgebraicWithEquivalenceRelation.
Module Type QUEUE.
(* We still have a type family of queues, plus the same three operations. *)
Parameter t : Set -> Set.
Parameter empty : forall A, t A.
Parameter enqueue : forall A, t A -> A -> t A.
Parameter dequeue : forall A, t A -> option (t A * A).
(* What's new? This equivalence relation. The type [Prop] stands for
* logical truth values, so a function returning it can be seen as a
* relation in the usual mathematical sense. This is a *binary* relation,
* in particular, since it takes two arguments. *)
Parameter equiv : forall A, t A -> t A -> Prop.
(* Let's declare convenient syntax for the relation. *)
Infix "~=" := equiv (at level 70).
(* It really is an equivalence relation, as formalized by the usual three
* laws. *)
Axiom equiv_refl : forall A (a : t A), a ~= a.
Axiom equiv_sym : forall A (a b : t A), a ~= b -> b ~= a.
Axiom equiv_trans : forall A (a b c : t A), a ~= b -> b ~= c -> a ~= c.
(* It must be the case that enqueuing elements preserves the relation. *)
Axiom equiv_enqueue : forall A (a b : t A) (x : A),
a ~= b
-> enqueue a x ~= enqueue b x.
(* We define a derived relation for results of [dequeue]: either both
* [dequeue]s failed to return anything, or both returned the same data
* value along with new queues that are themselves related. *)
Definition dequeue_equiv A (a b : option (t A * A)) :=
match a, b with
| None, None => True
@ -228,6 +341,9 @@ Module AlgebraicWithEquivalenceRelation.
a ~= b
-> dequeue a ~~= dequeue b.
(* We retain the three axioms from the prior interface, using our fancy
* relation instead of equality on queues. *)
Axiom dequeue_empty : forall A,
dequeue (empty A) = None.
Axiom empty_dequeue : forall A (q : t A),
@ -241,6 +357,8 @@ Module AlgebraicWithEquivalenceRelation.
end.
End QUEUE.
(* It's easy to redo [ListQueue], specifying normal equality for the
* equivalence relation. *)
Module ListQueue : QUEUE.
Definition t : Set -> Set := list.
@ -347,11 +465,27 @@ Module AlgebraicWithEquivalenceRelation.
Qed.
End ListQueue.
(* However, now we can implement the classic two-stacks optimized queue! *)
Module TwoStacksQueue : QUEUE.
(* Every queue is a pair of stacks: one for enqueuing and one for
* dequeuing. *)
Record stackpair (A : Set) := {
EnqueueHere : list A;
(* This stack has more recently enqueued elements closer to the front,
* making enqueuing constant-time. *)
DequeueHere : list A
(* This stack has least recently enqueued elements closer to the front,
* making dequeuing constant-time. *)
}.
(* What's the catch? Sometimes we need to reverse [EnqueueHere] and
* transfer it to [DequeueHere], or otherwise there would never be anything
* to dequeue! Luckily, the work we do in transfering is bounded
* asymptotically by the total number of enqueue/dequeue operations, so
* we get *amortized* constant time. *)
(* By the way, the [Record] feature we used above is similar to e.g. structs
* in C. *)
Definition t := stackpair.
@ -368,6 +502,8 @@ Module AlgebraicWithEquivalenceRelation.
| x :: dq => Some ({| EnqueueHere := q.(EnqueueHere);
DequeueHere := dq |}, x)
| [] =>
(* Out of dequeuable elements. Reverse enqueued elements
* and transfer to the other stack. *)
match rev q.(EnqueueHere) with
| [] => None
| x :: eq => Some ({| EnqueueHere := [];
@ -375,9 +511,12 @@ Module AlgebraicWithEquivalenceRelation.
end
end.
(* This function explains which simple queue representation we have in mind,
* for each fancy two-stack representation. *)
Definition elements A (q : t A) : list A :=
q.(EnqueueHere) ++ rev q.(DequeueHere).
(* That function is useful to define our equivalence relation. *)
Definition equiv A (a b : t A) :=
elements a = elements b.
Infix "~=" := equiv (at level 70).
@ -397,6 +536,9 @@ Module AlgebraicWithEquivalenceRelation.
equality.
Qed.
(* Now it is mostly routine to prove the laws, though a few tricks may
* be worth reading through. *)
Theorem equiv_enqueue : forall A (a b : t A) (x : A),
a ~= b
-> enqueue a x ~= enqueue b x.
@ -544,6 +686,9 @@ Module AlgebraicWithEquivalenceRelation.
Qed.
End TwoStacksQueue.
(* The exercise of the generic delayed sum may be repeated with equivalence
* relations. *)
Module DelayedSum (Q : QUEUE).
Fixpoint makeQueue (n : nat) (q : Q.t nat) : Q.t nat :=
match n with
@ -732,6 +877,12 @@ Module AlgebraicWithEquivalenceRelation.
End DelayedSum.
End AlgebraicWithEquivalenceRelation.
(* It's worth presenting one final style of data-abstraction formalism: we
* introduce *representation functions* in the interface, to map the internal
* representation to some standard one that is easy to reason about. We don't
* expect "real code" to call the representation function. Instead, it's just a
* proof device to let us write convincing laws. Here's the previous example
* redone in this manner, without comment. *)
Module RepFunction.
Module Type QUEUE.
Parameter t : Set -> Set.
@ -984,9 +1135,14 @@ Module RepFunction.
End RepFunction.
(** * Data abstraction with fixed parameter types *)
(* Finite sets are another classic *abstract data type*, another name for what
* we have been defining so far with modules. Here's a generic finite-set
* interface, following the first algebraic style we saw above. *)
Module Type FINITE_SET.
Parameter key : Set.
Parameter t : Set.
Parameter key : Set. (* What type of data may be added to these sets? *)
Parameter t : Set. (* What is the type of sets themselves? *)
Parameter empty : t.
Parameter add : t -> key -> t.
@ -1001,8 +1157,17 @@ Module Type FINITE_SET.
-> member (add s k1) k2 = member s k2.
Axiom decidable_equality : forall a b : key, a = b \/ a <> b.
(* This last axiom may be a bit surprising. Coq is so oriented toward
* computation that we don't assume the *law of the excluded middle*, which
* says that every proposition is either true or false. Instead, we prove
* specific instances as needed. But feel free to ignore this point for
* the purposes of this class. *)
End FINITE_SET.
(* We want a generic implementation of finite sets, as found in the standard
* libaries of languages like Java. However, not just any key set will do.
* We need enough computable operations. One sufficient operation is an
* equality test. *)
Module Type SET_WITH_EQUALITY.
Parameter t : Set.
Parameter equal : t -> t -> bool.
@ -1010,6 +1175,11 @@ Module Type SET_WITH_EQUALITY.
Axiom equal_ok : forall a b, if equal a b then a = b else a <> b.
End SET_WITH_EQUALITY.
(* Here's a generic implementation of finite sets, parameterized over an
* arbitrary set with a correct equality operation. Note the use of the [with]
* operator to *refine* the result signature [FINITE_SET]: we reveal that the
* [key] type is actually [SE.T], that is the key type from the parameter module
* [SE]. *)
Module ListSet(SE : SET_WITH_EQUALITY) <: FINITE_SET with Definition key := SE.t.
Definition key := SE.t.
Definition t := list SE.t.
@ -1057,6 +1227,7 @@ Module ListSet(SE : SET_WITH_EQUALITY) <: FINITE_SET with Definition key := SE.t
Qed.
End ListSet.
(* Here's an example decidable-equality type for natural numbers. *)
Module NatWithEquality <: SET_WITH_EQUALITY with Definition t := nat.
Definition t := nat.
@ -1084,8 +1255,10 @@ Module NatWithEquality <: SET_WITH_EQUALITY with Definition t := nat.
Qed.
End NatWithEquality.
(* And here's how to instantiate the generic set for the naturals. *)
Module NatSet := ListSet(NatWithEquality).
(* Now, some generic client code, for testing duplicate-freeness of lists. *)
Module FindDuplicates (FS : FINITE_SET).
Fixpoint noDuplicates' (ls : list FS.key) (s : FS.t) : bool :=
match ls with
@ -1095,9 +1268,13 @@ Module FindDuplicates (FS : FINITE_SET).
Definition noDuplicates (ls : list FS.key) := noDuplicates' ls FS.empty.
(* A characterization of having a duplicate: the list can be partitioned into
* pieces revealing the same element [a] at two boundaries. *)
Definition hasDuplicate (ls : list FS.key) :=
exists ls1 a ls2 ls3, ls = ls1 ++ a :: ls2 ++ a :: ls3.
(* A characterization of containing an element [a]: the list can be
* partitioned into two pieces, with [a] at the boundary. *)
Definition contains (a : FS.key) (ls : list FS.key) :=
exists ls1 ls2, ls = ls1 ++ a :: ls2.
@ -1227,16 +1404,31 @@ Compute NatDuplicateFinder.noDuplicates [1; 2].
Compute NatDuplicateFinder.noDuplicates [1; 2; 3].
Compute NatDuplicateFinder.noDuplicates [1; 2; 1; 3].
(** * Custom implementations of abstract data types *)
(* Sometimes we want to write custom implementations of polymorphic data types.
* Our last example of duplicate detection is a good one: we can make it much
* faster when we know something about how the lists will be built. In
* particular, finite sets of natural numbers can be made compact when we know
* that the common case is *intervals*, sets of consecutive numbers. *)
Module NatRangeSet <: FINITE_SET with Definition key := nat.
Definition key := nat.
Inductive rangeSet : Set :=
| Empty
(* Set has no elements. *)
| Range (from to : nat)
| AdHoc (s : NatSet.t).
(* Set contains exactly the numbers from [from] to [to], inclusive. *)
| AdHoc (s : NatSet.t)
(* Set isn't an interval, so fall back on the list-based version. *).
Definition t := rangeSet.
(* When we realize that a freshly formed set isn't an interval, we often need
* to convert an interval to an ad-hoc set. These functions accomplish
* that. *)
Fixpoint fromRange' (from to : nat) : NatSet.t :=
match to with
| 0 => NatSet.add NatSet.empty 0
@ -1544,6 +1736,9 @@ Module NatRangeSet <: FINITE_SET with Definition key := nat.
Qed.
End NatRangeSet.
(* Time for a head-to-head performance contest between our naive and clever
* sets! *)
Module FasterNatDuplicateFinder := FindDuplicates(NatRangeSet).
Fixpoint upto (n : nat) : list nat :=