(** Formal Reasoning About Programs * Supplementary Coq material: polymorphism and generic data structures * Author: Adam Chlipala * License: https://creativecommons.org/licenses/by-nc-nd/4.0/ *) Require Import Frap. Set Implicit Arguments. (* This command sets up automatic inference of tedious arguments. *) (* One of the recurring ingredients in effective machine formalization is data * structures, and, as in general programming, we want to be able to define data * structures that are *generic* in the kinds of data they contain. Coq * supports such things in roughly the same way as Haskell and OCaml, via * *parametric polymorphism*, which is closely related to the idea of *generics* * in languages like Java. *) (* Our first example: the [option] type family. While Java and friends force * all sorts of different types to include the special value [null], in Coq we * request that option explicitly by wrapping a type in [option]. Specifically, * any value of type [option A], for some type [A], is either [None] (sort of * like [null]) or [Some v] for a [v] of type [A]. *) Inductive option (A : Set) : Set := | None | Some (v : A). Arguments None {A}. (* This command asks Coq to *infer* the [A] type for each specific use of * [None]. *) (* Here are a few example terms using [option]. *) Example no_number : option nat := None. Example a_number : option nat := Some 42. Example no_number_squared : option (option nat) := None. Example no_number_squared_inside : option (option nat) := Some None. Example a_number_squared : option (option nat) := Some (Some 42). (* Pattern matching is the key ingredient for working with inductive definitions * of all sorts. Here are some examples matching on [option]s. *) Definition increment_optional (no : option nat) : option nat := match no with | None => None | Some n => Some (n + 1) end. (* Here we use type [A * B] of *pairs*, inhabited by values [(a, b)], with * [a : A] and [b : B]. *) Definition add_optional (po : option (nat * nat)) : option nat := match po with | None => None | Some (n, m) => Some (n + m) end. (** * Lists *) (* For functional programming (as in Coq), the king of all generic data * structures is the *list*. *) Inductive list (A : Set) : Set := | nil | cons (hd : A) (tl : list A). Arguments nil {A}. (* [nil] is the empty list, while [cons], standing for "construct," extends a * list of length [n] into one of length [n+1]. *) (* Here are some simple lists. *) Example nats0 : list nat := nil. Example nats1 : list nat := cons 1 nil. Example nats2 : list nat := cons 1 (cons 2 nil). (* Coq features a wonderful notation system, to help us write more concise and * readable code after introducing new syntactic forms. We will not give a * systematic presentation of the notation system, but we will show many * examples, from which it is possible to infer generality by scientific * induction. And, of course, the interested reader can always check the * notations chapter of the Coq reference manual. *) (* First, our examples can get more readable with an infix operator for [cons]. *) Infix "::" := cons. Example nats1' : list nat := 1 :: nil. Example nats2' : list nat := 1 :: 2 :: nil. (* Getting even more fancy, we declare a notation for list literals. *) Notation "[ ]" := nil. Notation "[ x1 ; .. ; xN ]" := (cons x1 (.. (cons xN nil) ..)). Example nats0'' : list nat := []. Example nats1'' : list nat := [1]. Example nats2'' : list nat := [1; 2]. Example nats3'' : list nat := [1; 2; 3]. (* Here are some classic recursive functions that operate over lists. * First, here is how to compute the length of a list. We put *implicit* * function arguments in curly braces, asking Coq to infer them at call * sites. *) Fixpoint length {A} (ls : list A) : nat := match ls with | nil => 0 | _ :: ls' => 1 + length ls' end. (* Here are two classic recursive functions on lists. *) (* First, concatenation: *) Fixpoint app {A} (ls1 ls2 : list A) : list A := match ls1 with | nil => ls2 | x :: ls1' => x :: app ls1' ls2 end. Infix "++" := app. (* Next, reversal: *) Fixpoint rev {A} (ls : list A) : list A := match ls with | nil => nil | x :: ls' => rev ls' ++ [x] end. Theorem length_app : forall A (ls1 ls2 : list A), length (ls1 ++ ls2) = length ls1 + length ls2. Proof. induct ls1; simplify; equality. Qed. (* One of the classic gotchas in functional-programming class is how slow this * naive [rev] is. Each [app] operation requires linear time, so running * linearly many [app]s brings us to quadratic time for [rev]. Using a helper * function, we can bring [rev] to its optimal linear time. *) Fixpoint rev_append {A} (ls acc : list A) : list A := match ls with | nil => acc | x :: ls' => rev_append ls' (x :: acc) end. (* This function [rev_append] takes an extra *accumulator* argument, in which we * gradually build up the original input in reversed order. The base case just * returns the accumulator. Now reversal just needs to do a [rev_append] with * an empty initial accumulator. *) Definition rev' {A} (ls : list A) : list A := rev_append ls []. (* A few test cases can help convince us that this seems to work. *) Compute rev [1; 2; 3; 4]. Compute rev' [1; 2; 3; 4]. Compute rev ["hi"; "bye"; "sky"]. Compute rev' ["hi"; "bye"; "sky"]. (* OK, great. Now it seems worth investing in a correctness proof. We'll * discover it interactively in class, but here's a worked-out final * answer, with the several lemmas that we discover are useful. *) (* List concatenation is associative. *) Lemma app_assoc : forall A (ls1 ls2 ls3 : list A), (ls1 ++ ls2) ++ ls3 = ls1 ++ (ls2 ++ ls3). Proof. induct ls1; simplify. equality. rewrite IHls1. equality. Qed. (* The natural correctness condition for [rev_append]: it does what it says on * the package, combining reversal with appending! *) Lemma rev_append_ok : forall A (ls acc : list A), rev_append ls acc = rev ls ++ acc. Proof. induct ls; simplify. equality. rewrite IHls. simplify. rewrite app_assoc. simplify. equality. Qed. (* IMPORTANT NOTE: the proof above would *not* have worked with the orders of * variables [ls] and [acc] swapped in the lemma statement! Try the proof that * way to see what goes wrong. The problem is that the induction hypothesis * would be too weak. A single [acc] value would be fixed for the whole proof, * while we need [acc] to *vary* throughout the induction, by retaining a * universal quantifier for it in the IH. *) (* Concatenating the empty list has no effect. *) Lemma app_nil : forall A (ls : list A), ls ++ [] = ls. Proof. induct ls; simplify. equality. equality. Qed. (* Now we can prove equivalence of [rev'] and [rev], with no new induction. *) Theorem rev'_ok : forall A (ls : list A), rev' ls = rev ls. Proof. simplify. unfold rev'. rewrite rev_append_ok. apply app_nil. Qed. (** ** Zipping and unzipping *) (* Another classic pair of list operations is zipping and unzipping. * These functions convert between pairs of lists and lists of pairs. *) Fixpoint zip {A1 A2} (ls1 : list A1) (ls2 : list A2) : list (A1 * A2) := match ls1, ls2 with | x1 :: ls1', x2 :: ls2' => (x1, x2) :: zip ls1' ls2' | _, _ => [] end. (* Note how, when passed two lengths of different lists, [zip] drops the * mismatched suffix of the longer list. *) (* An explicit [Set] annotation is needed here, for obscure type-inference * reasons. *) Fixpoint unzip {A1 A2 : Set} (ls : list (A1 * A2)) : list A1 * list A2 := match ls with | [] => ([], []) | (x1, x2) :: ls' => let (ls1, ls2) := unzip ls' in (x1 :: ls1, x2 :: ls2) end. (* A few common-sense properties hold of these definitions. *) Theorem length_zip : forall A1 A2 (ls1 : list A1) (ls2 : list A2), length (zip ls1 ls2) = min (length ls1) (length ls2). Proof. induct ls1; simplify. linear_arithmetic. cases ls2; simplify. linear_arithmetic. rewrite IHls1. linear_arithmetic. Qed. (* We write [fst] and [snd] for the first and second projection operators on * pairs, respectively. *) Theorem length_unzip1 : forall (A1 A2 : Set) (ls : list (A1 * A2)), length (fst (unzip ls)) = length ls. Proof. induct ls; simplify. equality. cases hd. (* Note that [cases] allows us to pull apart a pair into its two pieces. *) cases (unzip ls). simplify. equality. Qed. Theorem length_unzip2 : forall (A1 A2 : Set) (ls : list (A1 * A2)), length (snd (unzip ls)) = length ls. Proof. induct ls; simplify. equality. cases hd. cases (unzip ls). simplify. equality. Qed. Theorem zip_unzip : forall (A1 A2 : Set) (ls : list (A1 * A2)), (let (ls1, ls2) := unzip ls in zip ls1 ls2) = ls. Proof. induct ls; simplify. equality. cases hd. cases (unzip ls). simplify. equality. Qed. (* There are also interesting interactions with [app] and [rev]. *) Theorem unzip_app : forall (A1 A2 : Set) (x y : list (A1 * A2)), unzip (x ++ y) = (let (x1, x2) := unzip x in let (y1, y2) := unzip y in (x1 ++ y1, x2 ++ y2)). Proof. induct x; simplify. cases (unzip y). equality. cases hd. cases (unzip x). simplify. rewrite IHx. cases (unzip y). equality. Qed. Theorem unzip_rev : forall (A1 A2 : Set) (ls : list (A1 * A2)), unzip (rev ls) = (let (ls1, ls2) := unzip ls in (rev ls1, rev ls2)). Proof. induct ls; simplify. equality. cases hd. cases (unzip ls). simplify. rewrite unzip_app. rewrite IHls. simplify. equality. Qed. (** * Binary trees *) (* Another classic datatype is binary trees, which we can define like so. *) Inductive tree (A : Set) : Set := | Leaf | Node (l : tree A) (d : A) (r : tree A). Arguments Leaf {A}. Example tr1 : tree nat := Node (Node Leaf 7 Leaf) 8 (Node Leaf 9 (Node Leaf 10 Leaf)). (* There is a natural notion of size of a tree. *) Fixpoint size {A} (t : tree A) : nat := match t with | Leaf => 0 | Node l _ r => 1 + size l + size r end. (* There is also a natural sense of reversing a tree, flipping it around its * vertical axis. *) Fixpoint reverse {A} (t : tree A) : tree A := match t with | Leaf => Leaf | Node l d r => Node (reverse r) d (reverse l) end. (* There is a natural relationship between the two. *) Theorem size_reverse : forall A (t : tree A), size (reverse t) = size t. Proof. induct t; simplify. equality. linear_arithmetic. Qed. (* Another classic tree operation is flattening into lists. *) Fixpoint flatten {A} (t : tree A) : list A := match t with | Leaf => [] | Node l d r => flatten l ++ d :: flatten r end. (* Note here that operators [++] and [::] are right-associative. *) Theorem length_flatten : forall A (t : tree A), length (flatten t) = size t. Proof. induct t; simplify. equality. rewrite length_app. simplify. linear_arithmetic. Qed. Lemma rev_app : forall A (ls1 ls2 : list A), rev (ls1 ++ ls2) = rev ls2 ++ rev ls1. Proof. induct ls1; simplify. rewrite app_nil. equality. rewrite IHls1. apply app_assoc. Qed. Theorem rev_flatten : forall A (t : tree A), rev (flatten t) = flatten (reverse t). Proof. induct t; simplify. equality. rewrite rev_app. simplify. rewrite app_assoc. simplify. equality. Qed. (** * Syntax trees *) (* Trees are particularly important to us in studying program proof, since it is * natural to represent programs as *syntax trees*. Here's a quick example, for * a tiny imperative language. *) Inductive expression : Set := | Const (n : nat) | Var (x : var) | Plus (e1 e2 : expression) | Minus (e1 e2 : expression) | Times (e1 e2 : expression) | GreaterThan (e1 e2 : expression) | Not (e : expression). Inductive statement : Set := | Assign (x : var) (e : expression) | Sequence (s1 s2 : statement) | IfThenElse (e : expression) (s1 s2 : statement) | WhileLoop (e : expression) (s : statement). (* First, here's a quick sample of nifty notations to write * almost-natural-looking embedded programs in Coq. *) Coercion Const : nat >-> expression. Coercion Var : string >-> expression. Infix "+" := Plus : embedded_scope. Infix "-" := Minus : embedded_scope. Infix "*" := Times : embedded_scope. Infix ">" := GreaterThan : embedded_scope. Infix "<-" := Assign (at level 75) : embedded_scope. Infix ";" := Sequence (at level 76) : embedded_scope. Notation "'If' e {{ s1 }} 'else' {{ s2 }}" := (IfThenElse e s1 s2) (at level 75) : embedded_scope. Notation "'While' e {{ s }}" := (WhileLoop e s) (at level 75) : embedded_scope. Delimit Scope embedded_scope with embedded. Example factorial := ("answer" <- 1; While ("input" > 0) {{ "answer" <- "answer" * "input"; "input" <- "input" - 1 }})%embedded. (* A variety of compiler-style operations can be coded on top of this type. * Here's one to count total variable occurrences. *) Fixpoint varsInExpression (e : expression) : nat := match e with | Const _ => 0 | Var _ => 1 | Plus e1 e2 | Minus e1 e2 | Times e1 e2 | GreaterThan e1 e2 => varsInExpression e1 + varsInExpression e2 | Not e1 => varsInExpression e1 end. Fixpoint varsInStatement (s : statement) : nat := match s with | Assign _ e => 1 + varsInExpression e | Sequence s1 s2 => varsInStatement s1 + varsInStatement s2 | IfThenElse e s1 s2 => varsInExpression e + varsInStatement s1 + varsInStatement s2 | WhileLoop e s1 => varsInExpression e + varsInStatement s1 end. (* We will need to wait for a few more lectures' worth of conceptual progress * before we can prove that transformations on programs preserve meaning, but we * do already have enough tools that prove that transformations preserve more * basic properties, like number of variables. Here's one such transformation, * which flips "then" and "else" cases while also negating "if" conditions. *) Fixpoint flipper (s : statement) : statement := match s with | Assign _ _ => s | Sequence s1 s2 => Sequence (flipper s1) (flipper s2) | IfThenElse e s1 s2 => IfThenElse (Not e) (flipper s2) (flipper s1) | WhileLoop e s1 => WhileLoop e (flipper s1) end. Theorem varsIn_flipper : forall s, varsInStatement (flipper s) = varsInStatement s. Proof. induct s; simplify. equality. equality. linear_arithmetic. equality. Qed. (* Just for the sheer madcap fun of it, let's write some translations of * programs into our lists from before, with variables as data values. *) Fixpoint listifyExpression (e : expression) : list var := match e with | Const _ => [] | Var x => [x] | Plus e1 e2 | Minus e1 e2 | Times e1 e2 | GreaterThan e1 e2 => listifyExpression e1 ++ listifyExpression e2 | Not e1 => listifyExpression e1 end. Fixpoint listifyStatement (s : statement) : list var := match s with | Assign x e => x :: listifyExpression e | Sequence s1 s2 => listifyStatement s1 ++ listifyStatement s2 | IfThenElse e s1 s2 => listifyExpression e ++ listifyStatement s1 ++ listifyStatement s2 | WhileLoop e s1 => listifyExpression e ++ listifyStatement s1 end. Compute listifyStatement factorial. (* At this point, I can't resist switching to a more automated proof style, * though still a pretty tame one. *) Local Hint Rewrite length_app. Lemma length_listifyExpression : forall e, length (listifyExpression e) = varsInExpression e. Proof. induct e; simplify; linear_arithmetic. Qed. Local Hint Rewrite length_listifyExpression. Theorem length_listifyStatement : forall s, length (listifyStatement s) = varsInStatement s. Proof. induct s; simplify; linear_arithmetic. Qed. (* Other transformations are also possible, like the Swedish-Chef optimization, * which turns every variable into "bork". It saves many bits when most variable * names are longer than 4 characters. *) Fixpoint swedishExpression (e : expression) : expression := match e with | Const _ => e | Var _ => Var "bork" | Plus e1 e2 => Plus (swedishExpression e1) (swedishExpression e2) | Minus e1 e2 => Minus (swedishExpression e1) (swedishExpression e2) | Times e1 e2 => Times (swedishExpression e1) (swedishExpression e2) | GreaterThan e1 e2 => GreaterThan (swedishExpression e1) (swedishExpression e2) | Not e1 => Not (swedishExpression e1) end. Fixpoint swedishStatement (s : statement) : statement := match s with | Assign _ e => Assign "bork" (swedishExpression e) | Sequence s1 s2 => Sequence (swedishStatement s1) (swedishStatement s2) | IfThenElse e s1 s2 => IfThenElse (swedishExpression e) (swedishStatement s1) (swedishStatement s2) | WhileLoop e s1 => WhileLoop (swedishExpression e) (swedishStatement s1) end. Compute swedishStatement factorial. Fixpoint swedishList (ls : list var) : list var := match ls with | [] => [] | _ :: ls => "bork" :: swedishList ls end. Lemma swedishList_app : forall ls1 ls2, swedishList (ls1 ++ ls2) = swedishList ls1 ++ swedishList ls2. Proof. induct ls1; simplify; equality. Qed. Local Hint Rewrite swedishList_app. Lemma listifyExpression_swedishExpression : forall e, listifyExpression (swedishExpression e) = swedishList (listifyExpression e). Proof. induct e; simplify; equality. Qed. Local Hint Rewrite listifyExpression_swedishExpression. Lemma listifyStatement_swedishStatement : forall s, listifyStatement (swedishStatement s) = swedishList (listifyStatement s). Proof. induct s; simplify; equality. Qed.