(** Formal Reasoning About Programs * Chapter 19: Operational Semantics for Shared-Memory Concurrency * Author: Adam Chlipala * License: https://creativecommons.org/licenses/by-nc-nd/4.0/ *) Require Import Frap. Set Implicit Arguments. Set Asymmetric Patterns. (** * Shared notations and definitions; main material starts afterward. *) Notation "m $! k" := (match m $? k with Some n => n | None => O end) (at level 30). Definition heap := fmap nat nat. Local Hint Extern 1 (_ <= _) => linear_arithmetic : core. Local Hint Extern 1 (@eq nat _ _) => linear_arithmetic : core. (** * An object language with shared-memory concurrency *) (* We're going to start investigating how to verify concurrent programs whose * behavior is given with operational semantics. There are a variety of * different concurrency styles out there, with their distinctive practical and * theoretical benefits; we'll start with the most venerable style, shared * memory. *) (* We'll build on the mixed-embedding languages from the last two chapters. * Let's simplify the encoding by only working with commands that generate * [nat]. *) Inductive cmd := | Return (r : nat) | Bind (c1 : cmd) (c2 : nat -> cmd) | Read (a : nat) | Write (a v : nat) | Fail (* Now here's the new part: parallel composition of commands. *) | Par (c1 c2 : cmd) (* Let's also add locking commands, where locks are named by [nat]s. *) | Lock (a : nat) | Unlock (a : nat). Notation "x <- c1 ; c2" := (Bind c1 (fun x => c2)) (right associativity, at level 80). Infix "||" := Par. (* As the program runs, it has not just a heap but also a set of locks that are * taken at that moment. *) Definition locks := set nat. (* The first few rules below are basically the same as in last chapter, except * that we relax the restriction on only reading/writing addresses that are * explicitly mapped into the heap. *) Inductive step : heap * locks * cmd -> heap * locks * cmd -> Prop := | StepBindRecur : forall c1 c1' c2 h h' l l', step (h, l, c1) (h', l', c1') -> step (h, l, Bind c1 c2) (h', l', Bind c1' c2) | StepBindProceed : forall v c2 h l, step (h, l, Bind (Return v) c2) (h, l, c2 v) | StepRead : forall h l a, step (h, l, Read a) (h, l, Return (h $! a)) | StepWrite : forall h l a v, step (h, l, Write a v) (h $+ (a, v), l, Return 0) (* First interesting twist: we can "push steps through" the [Par] operator on * either side. The choice of a side is the sole source of nondeterminism in * this semantics, corresponding to the whims of a scheduler. *) | StepParRecur1 : forall h l c1 c2 h' l' c1', step (h, l, c1) (h', l', c1') -> step (h, l, Par c1 c2) (h', l', Par c1' c2) | StepParRecur2 : forall h l c1 c2 h' l' c2', step (h, l, c2) (h', l', c2') -> step (h, l, Par c1 c2) (h', l', Par c1 c2') (* To take a lock, it must not be held; and vice versa for releasing a lock. *) | StepLock : forall h l a, ~a \in l -> step (h, l, Lock a) (h, {a} \cup l, Return 0) | StepUnlock : forall h l a, a \in l -> step (h, l, Unlock a) (h, l \setminus {a}, Return 0). Definition trsys_of (h : heap) (l : locks) (c : cmd) := {| Initial := {(h, l, c)}; Step := step |}. (** * An example *) (* In this lecture, we'll focus on model checking as our program-proof * technique. Recall that model checking is all about reducing a problem to a * reachability question in a finite-state system. Our programs here have the * (perhaps surprising!) property that termination is guaranteed, for any * initial state, regardless of how the scheduler behaves. However, * counterintuitively, program execution does *not* need to be finite-state, * and thus amenable to model checking, though we will stick to examples that are. * Let's define a simple two-thread program and model-check it. *) (* Throughout this file, we'll only be verifying that no thread could ever reach * a [Fail] command that is next in line to execute, a property that is easy to * phrase as an invariant of the transition system. Here's how to compute * whether a command is about to fail. *) Fixpoint notAboutToFail (c : cmd) : bool := match c with | Fail => false | Bind c1 _ => notAboutToFail c1 | Par c1 c2 => notAboutToFail c1 && notAboutToFail c2 | _ => true end. Example two_increments_thread := _ <- Lock 0; n <- Read 0; if n <=? 1 then _ <- Write 0 (n + 1); Unlock 0 else Fail. Example two_increments := two_increments_thread || two_increments_thread. (* Next, we do one of our standard boring (and slow; sorry!) model-checking * proofs, where tactics explore the finite state space for us. *) Theorem two_increments_ok : invariantFor (trsys_of $0 {} two_increments) (fun p => let '(_, _, c) := p in notAboutToFail c = true). Proof. unfold two_increments, two_increments_thread. simplify. eapply invariant_weaken. apply multiStepClosure_ok; simplify. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_done. simplify. propositional; subst; equality. Qed. (* Notice how every step of the process needs to consider all possibilities of * threads that could run next, which, in general, gives us state spaces of size * *exponential* in the program-text length. That's really a shame from a * performance perspective, isn't it? Our goal now will be to apply * *optimizations* that show equivalence with alternative transition systems * whose state spaces are smaller. By the end, we'll be able to check * nontrivial concurrent programs by only computing state spaces with *linear* * size in program-text length! (The catch is that the technique only applies * for programs accepted by a simple static analysis.) *) (** * Optimization #1: always run all purely local actions first. *) (* Here's a function that, in a single go, performs all simplifications that are * *thread-local*. That is, no other thread can observe those steps, as they * don't touch the heap or lockset. *) Fixpoint runLocal (c : cmd) : cmd := match c with | Return _ => c | Bind c1 c2 => match runLocal c1 with | Return v => runLocal (c2 v) | c1' => Bind c1' c2 end | Read _ => c | Write _ _ => c | Fail => c | Par c1 c2 => Par (runLocal c1) (runLocal c2) | Lock _ => c | Unlock _ => c end. (* We can define an alternative step relation that always runs [runLocal] as a * kind of postprocessing on the new command. This way, the model checker won't * need to run separate exploration steps for all of those trivial * simplifications. *) Inductive stepL : heap * locks * cmd -> heap * locks * cmd -> Prop := | StepL : forall h l c h' l' c', step (h, l, c) (h', l', c') -> stepL (h, l, c) (h', l', runLocal c'). Definition trsys_ofL (h : heap) (l : locks) (c : cmd) := {| Initial := {(h, l, runLocal c)}; Step := stepL |}. (* Now we prove some basic facts; commentary resumes before [step_runLocal]. *) Local Hint Constructors step stepL : core. Lemma run_Return : forall h l r h' l' c, step^* (h, l, Return r) (h', l', c) -> h' = h /\ l' = l /\ c = Return r. Proof. induct 1; eauto. invert H. Qed. Lemma run_Bind : forall h l c1 c2 h' l' c', step^* (h, l, Bind c1 c2) (h', l', c') -> (exists c1', step^* (h, l, c1) (h', l', c1') /\ c' = Bind c1' c2) \/ (exists h'' l'' r, step^* (h, l, c1) (h'', l'', Return r) /\ step^* (h'', l'', c2 r) (h', l', c')). Proof. induct 1; eauto. invert H; eauto 10. Ltac inst H := repeat match type of H with | _ = _ -> _ => specialize (H eq_refl) | forall x : ?T, _ => let y := fresh in evar (y : T); let y' := eval unfold y in y in specialize (H y'); clear y end. inst IHtrc. first_order; eauto 10. Qed. Lemma StepBindRecur_star : forall c1 c1' c2 h h' l l', step^* (h, l, c1) (h', l', c1') -> step^* (h, l, Bind c1 c2) (h', l', Bind c1' c2). Proof. induct 1; eauto. cases y. cases p. eauto. Qed. Lemma StepParRecur1_star : forall h l c1 c2 h' l' c1', step^* (h, l, c1) (h', l', c1') -> step^* (h, l, Par c1 c2) (h', l', Par c1' c2). Proof. induct 1; eauto. cases y. cases p. eauto. Qed. Lemma StepParRecur2_star : forall h l c1 c2 h' l' c2', step^* (h, l, c2) (h', l', c2') -> step^* (h, l, Par c1 c2) (h', l', Par c1 c2'). Proof. induct 1; eauto. cases y. cases p. eauto. Qed. Local Hint Resolve StepBindRecur_star StepParRecur1_star StepParRecur2_star : core. Lemma runLocal_idem : forall c, runLocal (runLocal c) = runLocal c. Proof. induct c; simplify; eauto. cases (runLocal c); simplify; eauto. rewrite IHc; auto. rewrite IHc; auto. equality. Qed. (* The key correctness property: when an original step takes place, either it * has no effect or can be duplicated when we apply [runLocal] both *before* and * *after* the step. *) Lemma step_runLocal : forall h l c h' l' c', step (h, l, c) (h', l', c') -> (runLocal c = runLocal c' /\ h = h' /\ l = l') \/ exists c'', step (h, l, runLocal c) (h', l', c'') /\ runLocal c'' = runLocal c'. Proof. induct 1; simplify; first_order; eauto. rewrite H0; equality. cases (runLocal c1). invert H0. rewrite <- H1; eauto. rewrite <- H1; eauto. rewrite <- H1; eauto. rewrite <- H1; eauto. rewrite <- H1; eauto. rewrite <- H1; eauto. rewrite <- H1; eauto. rewrite H0; equality. right; eexists; propositional. eauto. simplify. rewrite runLocal_idem; equality. equality. right; eexists; propositional. eauto. simplify. rewrite runLocal_idem; equality. Qed. (* That was the main punchline. Commentary resumes at [step_stepL]. *) Lemma step_stepL' : forall h l c h' l' c', step^* (h, l, c) (h', l', c') -> stepL^* (h, l, runLocal c) (h', l', runLocal c'). Proof. induct 1; simplify; eauto. cases y. cases p. inst IHtrc. apply step_runLocal in H; first_order; subst. rewrite H; eauto. econstructor. econstructor. eauto. equality. Qed. Theorem notAboutToFail_runLocal : forall c, notAboutToFail (runLocal c) = true -> notAboutToFail c = true. Proof. induct c; simplify; auto. cases (runLocal c); simplify; auto. cases (runLocal c1); simplify; auto; propositional; repeat match goal with | [ H : _ |- _ ] => apply andb_true_iff in H; propositional | [ H : _ = _ |- _ ] => rewrite H end; try equality. Qed. (* The key proof principle: to verify a can-never-fail invariant for the * original semantics, it suffices to verify it for the new semantics * instead. *) Theorem step_stepL : forall h l c , invariantFor (trsys_ofL h l c) (fun p => let '(_, _, c) := p in notAboutToFail c = true) -> invariantFor (trsys_of h l c) (fun p => let '(_, _, c) := p in notAboutToFail c = true). Proof. unfold invariantFor; simplify. propositional; subst. cases s'. cases p. apply step_stepL' in H1. apply H in H1; eauto using notAboutToFail_runLocal. Qed. (* Now watch as we verify that last example in fewer steps, with a smaller * invariant! *) Theorem two_increments_ok_again : invariantFor (trsys_of $0 {} two_increments) (fun p => let '(_, _, c) := p in notAboutToFail c = true). Proof. apply step_stepL. unfold two_increments, two_increments_thread. simplify. eapply invariant_weaken. apply multiStepClosure_ok; simplify. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_done. simplify. propositional; subst; equality. Qed. (** * Optimization #2: partial-order reduction *) (* There was a key property lurking behind the soundness proof of our last * optimization: *commutativity*, one of the most common ways to tame the * state-space explosion from concurrency scheduling. Specifically, the local * steps performed by [runLocal] all *commute* with any steps taken in other * threads, because they are agnostic to shared state. Can we generalize the * technique to also harness commutativity of operations that *do* depend on the * shared state, but in particular controlled ways? Why, yes we can! The most * popular such technique from the model-checking world is * *partial-order reduction*. *) (* First, here's an example where we should be able to do better than allowing * either thread to run in every step, as we model-check. *) Example independent_threads := (a <- Read 0; _ <- Write 1 (a + 1); a <- Read 1; if a ==n 1 then Return 0 else Fail) || (b <- Read 2; Write 2 (b + 1)). (* Unfortunately, our existing model-checker does in fact follow the * "exponential" strategy to build the state space. *) Theorem independent_threads_ok : invariantFor (trsys_of $0 {} independent_threads) (fun p => let '(_, _, c) := p in notAboutToFail c = true). Proof. apply step_stepL. unfold independent_threads. simplify. eapply invariant_weaken. apply multiStepClosure_ok; simplify. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_step. model_check_done. simplify. propositional; subst; equality. Qed. (* It turns out that we can actually do model-checking where at each point we * only explore the result of running *the first thread that is ready*! Such a * strategy isn't sound for all programs, but it is for our example here. Why? * Every pair of atomic actions between threads *commutes*. That is, neither * one affects whether the other is enabled to execute (the way that one [Lock] * can disable another), and running the two actions in either order modifies * shared state identically. In such a case, we may always pick our favorite * thread to step next. *) (* To make all that formal, we will do some static program analysis to summarize * which atomic actions a thread might take. *) Record summary := { Reads : set nat; Writes : set nat; Locks : set nat }. (* Here is a relation to check the accuracy of a summary. *) Inductive summarize : cmd -> summary -> Prop := | SumReturn : forall r s, summarize (Return r) s | SumFail : forall s, summarize Fail s | SumBind : forall c1 c2 s, summarize c1 s -> (forall r, summarize (c2 r) s) (* Note the approximation here: we consider all [r] values, even if some * are semantically impossible. *) -> summarize (Bind c1 c2) s | SumRead : forall a s, a \in s.(Reads) -> summarize (Read a) s | SumWrite : forall a v s, a \in s.(Writes) -> summarize (Write a v) s | SumLock : forall a s, a \in s.(Locks) -> summarize (Lock a) s | SumUnlock : forall a s, a \in s.(Locks) -> summarize (Unlock a) s | SumPar : forall c1 c2 s, summarize c1 s -> summarize c2 s -> summarize (Par c1 c2) s. (* To check commutativity, it is helpful to know which atomic command a thread * could run next. We skip coverage of parallel compositions to make things * simple. *) Inductive nextAction : cmd -> cmd -> Prop := | NaReturn : forall r, nextAction (Return r) (Return r) | NaFail : nextAction Fail Fail | NaRead : forall a, nextAction (Read a) (Read a) | NaWrite : forall a v, nextAction (Write a v) (Write a v) | NaLock : forall l, nextAction (Lock l) (Lock l) | NaUnlock : forall l, nextAction (Unlock l) (Unlock l) | NaBind : forall c1 c2 c, nextAction c1 c -> nextAction (Bind c1 c2) c. (* We can succinctly capture which summaries describe threads that will commute * with a particular atomic action. The guarantee applies not just to the * thread's first action but also to all others that it might reach later in * execution. *) Definition commutes (c : cmd) (s : summary) : Prop := match c with | Return _ => True | Fail => True | Read a => ~a \in s.(Writes) | Write a _ => ~a \in s.(Reads) \cup s.(Writes) | Lock a => ~a \in s.(Locks) | Unlock a => ~a \in s.(Locks) | _ => False end. (* Now the new semantics: *) Inductive stepC (s : summary) : heap * locks * cmd -> heap * locks * cmd -> Prop := (* It is always OK to let the first thread run. *) | StepFirst : forall h l c1 h' l' c1' c2, step (h, l, c1) (h', l', c1') -> stepC s (h, l, c1 || c2) (h', l', c1' || c2) (* However, you may only pick another thread to run if it would be unsound to * consider just the first thread. The negation of the soundness condition is * expressed in the first premise below. *) | StepAny : forall h l c2 h' l' c2' c1, (forall c0 h'' l'' c1'', nextAction c1 c0 (* The first thread [c] has some atomic action [c0] * ready to run. *) -> commutes c0 s (* All other threads only contain actions that commute * with [c0]. *) -> step (h, l, c1) (h'', l'', c1'') (* Finally, [c] is actually enabled to run, which * might not be the case if [c0] is a locking * command. *) -> False) (* If we passed that check, then we can step a single thread as expected! *) -> step (h, l, c2) (h', l', c2') -> stepC s (h, l, c1 || c2) (h', l', c1 || c2'). (* Notice how this definition turns the partial-order-reduction optimization * "off and on" during state-space exploration. We only restrict our attention * to the first thread so long as the soundness condition above is true. *) Definition trsys_ofC (s : summary) (h : heap) (l : locks) (c : cmd) := {| Initial := {(h, l, c)}; Step := stepC s |}. (* Now we come to quite a few fairly complex lemmas. * First, [commutes] really does allow other commands to swap order with the * atomic action in question. *) Lemma commutes_sound' : forall h l c2 h' l' c2', step (h, l, c2) (h', l', c2') -> forall s c1 h'' l'' c1', step (h', l', c1) (h'', l'', c1') -> summarize c2 s -> commutes c1 s -> exists h1 l1, step (h, l, c1) (h1, l1, c1') /\ step (h1, l1, c2) (h'', l'', c2'). Proof. induct 1; simplify; eauto. invert H1. eapply IHstep in H0; eauto; first_order. eauto. invert H0; invert H; simplify; propositional; eauto. do 2 eexists; propositional. eauto. assert (a <> a0) by sets. replace (h' $! a) with (h' $+ (a0, v) $! a) by (simplify; equality). eauto. invert H0; invert H; simplify; propositional; eauto. do 2 eexists; propositional. eauto. assert (a <> a0) by sets. replace (h $+ (a, v) $+ (a0, v0)) with (h $+ (a0, v0) $+ (a, v)) by maps_equal. eauto. invert H1. eapply IHstep in H0; clear IHstep; eauto. first_order. eauto 10. invert H1. eapply IHstep in H0; clear IHstep; eauto. first_order. eauto 10. invert H1; invert H0; simplify; propositional; eauto. do 2 eexists; propositional. constructor. sets. replace ({a0, a} \cup l) with ({a} \cup ({a0} \cup l)) by sets. constructor. sets. do 2 eexists; propositional. constructor. sets; propositional. replace ({a} \cup l \setminus {a0}) with ({a} \cup (l \setminus {a0})) by sets. constructor. sets. invert H1; invert H0; simplify; propositional; eauto. do 2 eexists; propositional. constructor. sets. replace ({a0} \cup (l \setminus {a})) with (({a0} \cup l) \setminus {a}) by sets. constructor. sets. do 2 eexists; propositional. constructor. sets; propositional. replace ((l \setminus {a}) \setminus {a0}) with ((l \setminus {a0}) \setminus {a}) by sets. constructor. sets. Qed. (* Commentary now resumes at [commutes_sound]. *) Lemma step_nextAction_Return : forall r h l c h' l' c', step (h, l, c) (h', l', c') -> nextAction c (Return r) -> h' = h /\ l' = l /\ (forall h'' l'', step (h'', l'', c) (h'', l'', c')). Proof. induct 1; invert 1; propositional; eauto. Qed. Lemma step_nextAction_other : forall c0 h l c h' l' c', step (h, l, c) (h', l', c') -> nextAction c c0 -> (forall r, c0 <> Return r) -> exists f c0', step (h, l, c0) (h', l', c0') /\ c = f c0 /\ c' = f c0' /\ forall h1 l1 h2 l2 c0'', step (h1, l1, c0) (h2, l2, c0'') -> step (h1, l1, f c0) (h2, l2, f c0''). Proof. induct 1; invert 1; first_order; subst; eauto. exists (fun X => x <- x X; c2 x); eauto 10. invert H3. unfold not in *; exfalso; eauto. exists (fun X => X); eauto. exists (fun X => X); eauto. exists (fun X => X); eauto 10. exists (fun X => X); eauto 10. Qed. Lemma nextAction_couldBe : forall c c0, nextAction c c0 -> match c0 with | Return _ => True | Fail => True | Read _ => True | Write _ _ => True | Lock _ => True | Unlock _ => True | _ => False end. Proof. induct 1; auto. Qed. (* [commutes] allows order-swapping even when the atomic action is embedded * further within the structure of a larger command. *) Lemma commutes_sound : forall h l c2 h' l' c2', step (h, l, c2) (h', l', c2') -> forall s c1 c0 h'' l'' c1', step (h', l', c1) (h'', l'', c1') -> summarize c2 s -> nextAction c1 c0 -> commutes c0 s -> exists h1 l1, step (h, l, c1) (h1, l1, c1') /\ step (h1, l1, c2) (h'', l'', c2'). Proof. simplify. assert (Hc : commutes c0 s) by assumption. specialize (nextAction_couldBe H2). cases c0; propositional. assert (Hs : step (h', l', c1) (h'', l'', c1')) by assumption. eapply step_nextAction_Return in H0; eauto; propositional; subst. eauto. eapply step_nextAction_other in H0; eauto; first_order; subst; try equality. eapply commutes_sound' with (c2 := c2) (c1 := Read a) in H3; eauto. first_order; eauto. eapply step_nextAction_other in H0; eauto; first_order; subst; try equality. eapply commutes_sound' with (c2 := c2) (c1 := Write a v) in H; eauto; try solve [ simplify; sets ]. first_order; eauto. eapply step_nextAction_other in H0; eauto; first_order; subst; try equality. invert H0. eapply step_nextAction_other in H0; eauto; first_order; subst; try equality. eapply commutes_sound' with (c2 := c2) (c1 := Lock a) in H3; eauto. first_order; eauto. eapply step_nextAction_other in H0; eauto; first_order; subst; try equality. eapply commutes_sound' with (c2 := c2) (c1 := Unlock a) in H3; eauto. first_order; eauto. Qed. Local Hint Constructors summarize : core. (* The next two lemmas show that, once a summary is accurate for a command, it * remains accurate throughout the whole execution lifetime of the command. *) Lemma summarize_step : forall h l c h' l' c' s, step (h, l, c) (h', l', c') -> summarize c s -> summarize c' s. Proof. induct 1; invert 1; simplify; eauto. Qed. Lemma summarize_steps : forall h l c h' l' c' s, step^* (h, l, c) (h', l', c') -> summarize c s -> summarize c' s. Proof. induct 1; eauto. cases y. cases p. eauto using summarize_step. Qed. (* The next technical device will require that we bound how many steps of * execution particular commands could run for. We use a conservative * overapproximation that is easy to compute, phrased as a relation. *) Inductive boundRunningTime : cmd -> nat -> Prop := | BrtReturn : forall r n, boundRunningTime (Return r) n | BrtFail : forall n, boundRunningTime Fail n | BrtRead : forall a n, boundRunningTime (Read a) (S n) | BrtWrite : forall a v n, boundRunningTime (Write a v) (S n) | BrtLock : forall a n, boundRunningTime (Lock a) (S n) | BrtUnlock : forall a n, boundRunningTime (Unlock a) (S n) | BrtBind : forall c1 c2 n1 n2, boundRunningTime c1 n1 -> (forall r, boundRunningTime (c2 r) n2) -> boundRunningTime (Bind c1 c2) (S (n1 + n2)) | BrtPar : forall c1 c2 n1 n2, boundRunningTime c1 n1 -> boundRunningTime c2 n2 -> boundRunningTime (Par c1 c2) (S (n1 + n2)). (* The extra [S] here may seem superfluous, but it helps make a certain * induction well-founded. *) (* Perhaps surprisingly, there exist commands that have no finite time bounds! * Mixed-embedding languages often have these counterintuitive properties. *) Fixpoint scribbly (n : nat) : cmd := match n with | O => Return 0 | S n' => _ <- Write n' 0; scribbly n' end. Lemma scribbly_time : forall n m, boundRunningTime (scribbly n) m -> m >= n. Proof. induct n; invert 1; auto. invert H2. specialize (H4 n0). apply IHn in H4. linear_arithmetic. Qed. Theorem boundRunningTime_not_total : exists c, forall n, ~boundRunningTime c n. Proof. exists (n <- Read 0; scribbly n); propositional. invert H. specialize (H4 (S n2)). apply scribbly_time in H4. linear_arithmetic. Qed. Local Hint Constructors boundRunningTime : core. (* Key property: taking a step of execution lowers the running-time bound. *) Lemma boundRunningTime_step : forall c n h l h' l', boundRunningTime c n -> forall c', step (h, l, c) (h', l', c') -> exists n', boundRunningTime c' n' /\ n' < n. Proof. induct 1; invert 1; simplify; eauto. apply IHboundRunningTime in H4; first_order; subst. eexists; propositional. eauto. linear_arithmetic. apply IHboundRunningTime1 in H3; first_order; subst. eauto 6. apply IHboundRunningTime2 in H3; first_order; subst. eauto 6. Qed. Lemma boundRunningTime_steps : forall h l c h' l' c', step^* (h, l, c) (h', l', c') -> forall n, boundRunningTime c n -> exists n', boundRunningTime c' n' /\ n' <= n. Proof. induct 1; simplify; eauto. cases y. cases p. specialize (boundRunningTime_step H1 H); first_order. eapply IHtrc in H2; eauto. first_order. eauto. Qed. (* Here we get a bit naughty and begin to depend on *classical logic*, as with * the *law of the excluded middle*: [forall P, P \/ ~P]. You may not have * noticed that we've rarely applied that principle explicitly so far! *) (* A very useful property: when a command has bounded running time, any * execution starting from that command can be *completed* to one ending in a * stuck state. This property definitely wouldn't be true without the bound, * if our language had explicit, unbounded loops. * * The fun thing about this proof is that we are essentially using tactics to * define an interpreter for the object language, making arbitrary scheduling * choices. Implicit in the derivation is a proof that this interpreter always * terminates, which we get by strong induction on the running-time bound. *) Theorem complete_trace : forall k c n, boundRunningTime c n -> n <= k -> forall h l, exists h' l' c', step^* (h, l, c) (h', l', c') /\ (forall h'' l'' c'', step (h', l', c') (h'', l'', c'') -> False). Proof. induct k; simplify. invert H; try linear_arithmetic. do 3 eexists; propositional. eauto. invert H. do 3 eexists; propositional. eauto. invert H. invert H. do 3 eexists; propositional. eauto. invert H. do 3 eexists; propositional. eauto. invert H. do 3 eexists; propositional. apply trc_one. eauto. invert H. do 3 eexists; propositional. apply trc_one. eauto. invert H. excluded_middle (a \in l). do 3 eexists; propositional. eauto. invert H1. sets. do 3 eexists; propositional. apply trc_one. eauto. invert H1. excluded_middle (a \in l). do 3 eexists; propositional. apply trc_one. eauto. invert H1. do 3 eexists; propositional. eauto. invert H1. sets. eapply IHk in H1; eauto; first_order. cases x1. specialize (H2 r). eapply IHk in H2; eauto; first_order. do 3 eexists; propositional. eapply trc_trans. apply StepBindRecur_star. eassumption. eapply TrcFront. eauto. eassumption. eauto. do 3 eexists; propositional. apply StepBindRecur_star. eassumption. invert H3. eauto. do 3 eexists; propositional. apply StepBindRecur_star. eassumption. invert H3. eauto. do 3 eexists; propositional. apply StepBindRecur_star. eassumption. invert H3. eauto. do 3 eexists; propositional. apply StepBindRecur_star. eassumption. invert H3. eauto. do 3 eexists; propositional. apply StepBindRecur_star. eassumption. invert H3. eauto. do 3 eexists; propositional. apply StepBindRecur_star. eassumption. invert H3. eauto. do 3 eexists; propositional. apply StepBindRecur_star. eassumption. invert H3. eauto. assert (Hb1 : boundRunningTime c1 n1) by assumption. assert (Hb2 : boundRunningTime c2 n2) by assumption. eapply IHk in H1; try linear_arithmetic; first_order. invert H. eapply IHk in H2; try linear_arithmetic; first_order. invert H. do 3 eexists; propositional. eauto. invert H; eauto. cases y. cases p. specialize (boundRunningTime_step Hb2 H3); first_order. assert (boundRunningTime (Par x1 c) (S (n1 + x3))) by eauto. eapply IHk in H6; try linear_arithmetic; first_order. do 3 eexists; propositional. eapply TrcFront. eauto. eassumption. eauto. cases y. cases p. specialize (boundRunningTime_step Hb1 H3); first_order. assert (boundRunningTime (Par c c2) (S (x2 + n2))) by eauto. eapply IHk in H6; try linear_arithmetic; first_order. do 3 eexists; propositional. eapply TrcFront. eauto. eassumption. eauto. Qed. (* We will apply completion to traces that end in violation of the * not-about-to-fail invariant. It is important that any extension of such a * trace preserves that property. *) Lemma notAboutToFail_step : forall h l c h' l' c', step (h, l, c) (h', l', c') -> notAboutToFail c = false -> notAboutToFail c' = false. Proof. induct 1; simplify; eauto; try equality. apply andb_false_iff in H0. apply andb_false_iff. propositional. apply andb_false_iff in H0. apply andb_false_iff. propositional. Qed. Lemma notAboutToFail_steps : forall h l c h' l' c', step^* (h, l, c) (h', l', c') -> notAboutToFail c = false -> notAboutToFail c' = false. Proof. induct 1; simplify; eauto. cases y. cases p. eauto using notAboutToFail_step. Qed. (* One last technical device: we define a variant of [step^*] that tracks how * many steps were made, which will come in handy for induction shortly. *) Inductive stepsi : nat -> heap * locks * cmd -> heap * locks * cmd -> Prop := | StepsiO : forall st, stepsi O st st | StepsiS : forall st1 st2 st3 i, step st1 st2 -> stepsi i st2 st3 -> stepsi (S i) st1 st3. Local Hint Constructors stepsi : core. Theorem steps_stepsi : forall st1 st2, step^* st1 st2 -> exists i, stepsi i st1 st2. Proof. induct 1; first_order; eauto. Qed. Local Hint Constructors stepC : core. (* The next few lemmas are quite technical. Commentary resumes for * [translate_trace]. *) Lemma nextAction_det : forall c c0, nextAction c c0 -> forall h l h' l' c', step (h, l, c) (h', l', c') -> forall h'' l'' c'', step (h, l, c) (h'', l'', c'') -> h' = h'' /\ l'' = l' /\ c'' = c'. Proof. induct 1; invert 1; invert 1; auto. eapply IHnextAction in H2; eauto. equality. invert H2. invert H2. Qed. Lemma commute_writes : forall c1 c a s h l1' h' l' v, nextAction c1 c -> a \in Writes s -> commutes c s -> forall c1', step (h, l1', c1) (h', l', c1') -> step (h $+ (a, v), l1', c1) (h' $+ (a, v), l', c1'). Proof. induct 1; simplify. invert H1. invert H1. invert H1. replace (h' $! a0) with (h' $+ (a, v) $! a0). eauto. assert (a <> a0) by sets. simplify; equality. invert H1. replace (h $+ (a0, v0) $+ (a, v)) with (h $+ (a, v) $+ (a0, v0)). eauto. assert (a <> a0) by sets. maps_equal. invert H1. eauto. invert H1. eauto. invert H2; eauto. Qed. Lemma commute_locks : forall c1 c a s h l1' h' l', nextAction c1 c -> a \in Locks s -> commutes c s -> forall c1', step (h, l1', c1) (h', l', c1') -> step (h, {a} \cup l1', c1) (h', {a} \cup l', c1'). Proof. induct 1; simplify. invert H1. invert H1. invert H1; eauto. invert H1; eauto. invert H1. replace ({a} \cup ({l} \cup l1')) with ({l} \cup ({a} \cup l1')) by sets. constructor. sets. invert H1. replace ({a} \cup (l1' \setminus {l})) with (({a} \cup l1') \setminus {l}) by sets. constructor. sets. invert H2; eauto. Qed. Lemma commute_unlocks : forall c1 c a s h l1' h' l', nextAction c1 c -> a \in Locks s -> commutes c s -> forall c1', step (h, l1', c1) (h', l', c1') -> step (h, l1' \setminus {a}, c1) (h', l' \setminus {a}, c1'). Proof. induct 1; simplify. invert H1. invert H1. invert H1; eauto. invert H1; eauto. invert H1. replace (({l} \cup l1') \setminus {a}) with ({l} \cup (l1' \setminus {a})) by sets. constructor. sets. invert H1. replace ((l1' \setminus {l}) \setminus {a}) with ((l1' \setminus {a}) \setminus {l}) by sets. constructor. sets. invert H2; eauto. Qed. Lemma commutes_noblock : forall c c0, nextAction c c0 -> forall h l h' l' c', step (h, l, c) (h', l', c') -> forall c1 s, summarize c1 s -> commutes c0 s -> forall h1' l1' c1', step (h, l, c1) (h1', l1', c1') -> exists h'' l'', step (h1', l1', c) (h'', l'', c'). Proof. induct 1; invert 1. induct 1; simplify; eauto. invert H0. invert H0. invert H3; eauto. invert H1; eauto. invert H1; eauto. assert (a0 <> a) by sets. replace (h' $! a) with (h' $+ (a0, v) $! a) by (simplify; equality). eauto. invert H1; eauto. invert H1; eauto. invert H2; eauto. induct 1; simplify; eauto. induct 1; simplify; eauto. invert H0. invert H0. invert H4; eauto. invert H2; eauto. invert H2; eauto. invert H2; eauto. do 2 eexists. constructor. sets. invert H2; eauto. do 2 eexists. constructor. sets. invert H3; eauto. induct 1; simplify; eauto. invert H0. invert H0. invert H4; eauto. invert H2; eauto. invert H2; eauto. invert H2; eauto. do 2 eexists. constructor. sets. invert H2; eauto. do 2 eexists. constructor. sets. invert H3; eauto. induct 1; simplify; eauto. invert H1. invert H1. invert H5; eauto. invert H3; eauto. invert H3. do 2 eexists; eapply commute_writes in H2; eauto. invert H3. do 2 eexists; eapply commute_locks in H2; eauto. invert H3. do 2 eexists; eapply commute_unlocks in H2; eauto. invert H1; eauto. eauto. Qed. Lemma translate_trace_commute : forall i h l c1 c2 h' l' c', stepsi (S i) (h, l, c1 || c2) (h', l', c') -> (forall h'' l'' c'', step (h', l', c') (h'', l'', c'') -> False) -> forall s, summarize c2 s -> forall x, nextAction c1 x -> commutes x s -> forall x0 x1 x2, step (h, l, c1) (x0, x1, x2) -> exists h'' l'', step (h, l, c1) (h'', l'', x2) /\ stepsi i (h'', l'', x2 || c2) (h', l', c'). Proof. induct 1; simplify. invert H0. invert H. eapply nextAction_det in H5; try eapply H6; eauto; propositional; subst. eauto 10. eapply commutes_noblock in H3; eauto. first_order. exfalso; eauto. invert H. eapply nextAction_det in H5; try eapply H12; eauto; propositional; subst. eauto 10. assert (Hnext : nextAction c1 x) by assumption. eapply commutes_noblock in Hnext; eauto. first_order. eapply IHstepsi in H; clear IHstepsi; eauto using summarize_step. first_order. eapply commutes_sound with (c1 := c1) (c2 := c2) (c0 := x) in H; eauto. first_order. eauto 10. Qed. (* The heart of the soundness proof! When a length-[i] derivation gets us to a * stuck state that is about to fail, and when we have summarized the program, * we can run that summary in the optimized semantics and also arrive at a state * that is about to fail. Thus, if we explore the optimized state space and * find no failures, we can conclude lack of reachable failures in the original * state space. *) Lemma translate_trace : forall i h l c1 c2 h' l' c', stepsi i (h, l, c1 || c2) (h', l', c') -> (forall h'' l'' c'', step (h', l', c') (h'', l'', c'') -> False) -> notAboutToFail c' = false -> forall s, summarize c2 s -> exists h' l' c'', (stepC s)^* (h, l, c1 || c2) (h', l', c'') /\ notAboutToFail c'' = false. Proof. induct i; simplify. invert H. eauto 10. excluded_middle (exists c0 h' l' c', nextAction c1 c0 /\ commutes c0 s /\ step (h, l, c1) (h', l', c')). first_order. eapply translate_trace_commute in H; eauto. first_order. eapply IHi in H6; eauto. first_order. eauto 7. invert H. invert H5. eapply IHi in H6; eauto. first_order. eauto 7. eapply IHi in H6; eauto using summarize_step. first_order. do 3 eexists; propositional. 2: apply H4. eauto 10. Qed. Require Import Classical. (* This theorem brings it all together, to reduce one invariant-proof problem to * another that uses the optimized semantics. *) Theorem step_stepC : forall h l c1 c2 s n, summarize c2 s -> boundRunningTime (c1 || c2) n -> invariantFor (trsys_ofC s h l (c1 || c2)) (fun p => let '(_, _, c) := p in notAboutToFail c = true) -> invariantFor (trsys_of h l (c1 || c2)) (fun p => let '(_, _, c) := p in notAboutToFail c = true). Proof. simplify. apply NNPP; propositional. unfold invariantFor in H2. apply not_all_ex_not in H2; first_order. apply imply_to_and in H2; propositional. apply not_all_ex_not in H4; first_order. apply imply_to_and in H2; propositional. cases x0. cases p. subst. simplify. cases (notAboutToFail c); propositional. assert (exists n', boundRunningTime c n' /\ n' <= n) by eauto using boundRunningTime_steps. first_order. eapply complete_trace in H2; eauto. first_order. specialize (trc_trans H4 H2); simplify. assert (notAboutToFail x2 = false) by eauto using notAboutToFail_steps. unfold invariantFor in H1; simplify. apply steps_stepsi in H7; first_order. eapply translate_trace in H7; eauto. first_order. apply H1 in H7; auto. equality. Qed. (* Now we define some tactics to help us apply this technique automatically for * concrete programs. As usual, we won't explain how the tactics work. *) Ltac analyzer := repeat (match goal with | [ |- context[if ?E then _ else _] ] => cases E | _ => econstructor end; simplify; try solve [ sets ]). Ltac por_solve := simplify; repeat econstructor; sets; linear_arithmetic. Ltac por_invert1 := match goal with | [ H : forall (c0 : cmd) (h'' : heap) (l'' : locks) (c'' : cmd), _ -> _ |- _ ] => (exfalso; eapply H; try por_solve; por_solve) || clear H | _ => model_check_invert1 end. Ltac por_invert := simplify; repeat por_invert1. Ltac por_closure := repeat (apply oneStepClosure_empty || (apply oneStepClosure_split; [ por_invert; try equality; solve [ singletoner ] | ])). Ltac por_step := eapply MscStep; [ por_closure | simplify ]. Ltac por_done := apply MscDone. (* OK, ready to return to our last example! This time we will see state-space * exploration that steps a single thread at a time, where the final invariant * includes no states with multiple *partially executed* threads. *) Theorem independent_threads_ok_again : invariantFor (trsys_of $0 {} independent_threads) (fun p => let '(_, _, c) := p in notAboutToFail c = true). Proof. (* We need to supply the summary when invoking the proof principle, though we * could also have used Ltac to compute it automatically. *) eapply step_stepC with (s := {| Reads := {2}; Writes := {2}; Locks := {} |}). analyzer. analyzer. eapply invariant_weaken. apply multiStepClosure_ok; simplify. por_step. por_step. por_step. por_step. por_step. por_step. por_step. por_step. por_step. por_step. por_done. sets. (* We computed an inexact running time. By filling in zeroes for some * existential variables, we commit to a concrete bound. *) Unshelve. exact 0. exact 0. exact 0. exact 0. exact 0. exact 0. Qed.