SharedMemory: comments

This commit is contained in:
Adam Chlipala 2016-04-24 15:29:21 -04:00
parent 8d250037e7
commit 9f938e6ac1

View file

@ -12,20 +12,21 @@ Set Asymmetric Patterns.
Notation "m $! k" := (match m $? k with Some n => n | None => O end) (at level 30).
Definition heap := fmap nat nat.
Definition assertion := heap -> Prop.
Hint Extern 1 (_ <= _) => linear_arithmetic.
Hint Extern 1 (@eq nat _ _) => linear_arithmetic.
Ltac simp := repeat (simplify; subst; propositional;
try match goal with
| [ H : ex _ |- _ ] => invert H
end); try linear_arithmetic.
(** * An object language with shared-memory concurrency *)
(* Let's simplify the encoding by only working with commands that generate
(* We're going to start investigating how to verify concurrent programs whose
* behavior is given with operational semantics. There are a variety of
* different concurrency styles out there, with their distinctive practical and
* theoretical benefits; we'll start with the most venerable style, shared
* memory. *)
(* We'll build on the mixed-embedding languages from the last two chapter.
* Let's simplify the encoding by only working with commands that generate
* [nat]. *)
Inductive loop_outcome :=
| Done (a : nat)
@ -48,8 +49,13 @@ Inductive cmd :=
Notation "x <- c1 ; c2" := (Bind c1 (fun x => c2)) (right associativity, at level 80).
Infix "||" := Par.
(* As the program runs, it has not just a heap but also a set of locks that are
* taken at that moment. *)
Definition locks := set nat.
(* The first few rules below are basically the same as in last chapter, except
* that we relax the restriction on only reading/writing addresses that are
* explicitly mapped into the heap. *)
Inductive step : heap * locks * cmd -> heap * locks * cmd -> Prop :=
| StepBindRecur : forall c1 c1' c2 h h' l l',
step (h, l, c1) (h', l', c1')
@ -62,6 +68,9 @@ Inductive step : heap * locks * cmd -> heap * locks * cmd -> Prop :=
| StepWrite : forall h l a v,
step (h, l, Write a v) (h $+ (a, v), l, Return 0)
(* First interesting twist: we can "push steps through" the [Par] operator on
* either side. The choice of a side is the sole source of nondeterminism in
* this semantics, corresponding to the whims of a scheduler. *)
| StepParRecur1 : forall h l c1 c2 h' l' c1',
step (h, l, c1) (h', l', c1')
-> step (h, l, Par c1 c2) (h', l', Par c1' c2)
@ -69,6 +78,7 @@ Inductive step : heap * locks * cmd -> heap * locks * cmd -> Prop :=
step (h, l, c2) (h', l', c2')
-> step (h, l, Par c1 c2) (h', l', Par c1 c2')
(* To take a lock, it must not be held; and vice versa for releasing a lock. *)
| StepLock : forall h l a,
~a \in l
-> step (h, l, Lock a) (h, l \cup {a}, Return 0)
@ -82,6 +92,30 @@ Definition trsys_of (h : heap) (l : locks) (c : cmd) := {|
|}.
(** * An example *)
(* In this lecture, we'll focus on model checking as our program-proof
* technique. Recall that model checking is all about reducing a problem to a
* reachability question in a finite-state system. Our programs here have the
* (perhaps surprising!) property that termination is guaranteed, for any
* initial state, regardless of how the scheduler behaves. Therefore, all
* programs of this language are finite-state and thus, in principle, amenable
* to model checking! (We were careful to leave out looping constructs.)
* Let's define a simple two-thread program and model-check it. *)
(* Throughout this file, we'll only be verifying that no thread could ever reach
* a [Fail] command that is next in line to execute, a property that is easy to
* phrase as an invariant of the transition system. Here's how to compute
* whether a command is about to fail. *)
Fixpoint notAboutToFail (c : cmd) : bool :=
match c with
| Fail => false
| Bind c1 _ => notAboutToFail c1
| Par c1 c2 => notAboutToFail c1 && notAboutToFail c2
| _ => true
end.
Example two_increments_thread :=
_ <- Lock 0;
n <- Read 0;
@ -93,21 +127,14 @@ Example two_increments_thread :=
Example two_increments := two_increments_thread || two_increments_thread.
Fixpoint notAboutToFail (c : cmd) : bool :=
match c with
| Fail => false
| Bind c1 _ => notAboutToFail c1
| Par c1 c2 => notAboutToFail c1 && notAboutToFail c2
| _ => true
end.
(* Next, we do one of our standard boring (and slow; sorry!) model-checking
* proofs, where tactics explore the finite state space for us. *)
Theorem two_increments_ok :
invariantFor (trsys_of $0 {} two_increments)
(fun p => let '(_, _, c) := p in
notAboutToFail c = true).
Proof.
Admitted.
(* unfold two_increments, two_increments_thread.
unfold two_increments, two_increments_thread.
simplify.
eapply invariant_weaken.
apply multiStepClosure_ok; simplify.
@ -130,11 +157,24 @@ Admitted.
simplify.
propositional; subst; equality.
Qed.*)
Qed.
(* Notice how every step of the process needs to consider all possibilities of
* threads that could run next, which, in general, gives us state spaces of size
* *exponential* in the program-text length. That's really a shame from a
* performance perspective, isn't it? Our goal now will be to apply
* *optimizations* that show equivalence with alternative transition systems
* whose state spaces are smaller. By the end, we'll be able to check
* nontrivial concurrent programs by only computing state spaces with *linear*
* size in program-text length! (The catch is that the technique only applies
* for programs accepted by a simple static analysis.) *)
(** * Optimization #1: always run all purely local actions first. *)
(* Here's a function that, in a single go, performs all simplifications that are
* *thread-local*. That is, no other thread can observe those steps, as they
* don't touch the heap or lockset. *)
Fixpoint runLocal (c : cmd) : cmd :=
match c with
| Return _ => c
@ -151,6 +191,10 @@ Fixpoint runLocal (c : cmd) : cmd :=
| Unlock _ => c
end.
(* We can define an alternative step relation that always runs [runLocal] as a
* kind of postprocessing on the new command. This way, the model checker won't
* need to run separate exploration steps for all of those trivial
* simplifications. *)
Inductive stepL : heap * locks * cmd -> heap * locks * cmd -> Prop :=
| StepL : forall h l c h' l' c',
step (h, l, c) (h', l', c')
@ -161,6 +205,8 @@ Definition trsys_ofL (h : heap) (l : locks) (c : cmd) := {|
Step := stepL
|}.
(* Now we prove some basic facts; commentary resumes before [step_runLocal]. *)
Hint Constructors step stepL.
Lemma run_Return : forall h l r h' l' c,
@ -234,6 +280,9 @@ Proof.
equality.
Qed.
(* The key correctnss property: when an original step takes place, either it
* has no effect or can be duplicated when we apply [runLocal] both *before* and
* *after* the step. *)
Lemma step_runLocal : forall h l c h' l' c',
step (h, l, c) (h', l', c')
-> (runLocal c = runLocal c' /\ h = h' /\ l = l')
@ -267,6 +316,8 @@ Proof.
rewrite runLocal_idem; equality.
Qed.
(* That was the main punchline. Commentary resumes at [step_stepL]. *)
Lemma step_stepL' : forall h l c h' l' c',
step^* (h, l, c) (h', l', c')
-> stepL^* (h, l, runLocal c) (h', l', runLocal c').
@ -296,6 +347,9 @@ Proof.
end; try equality.
Qed.
(* The key proof principle: to verify a can-never-fail invariant for the
* original semantics, it suffices to verify it for the new semantics
* instead. *)
Theorem step_stepL : forall h l c ,
invariantFor (trsys_ofL h l c) (fun p => let '(_, _, c) := p in
notAboutToFail c = true)
@ -312,13 +366,14 @@ Proof.
apply H in H1; eauto using notAboutToFail_runLocal.
Qed.
(* Now watch as we verify that last example in fewer steps, with a smaller
* invariant! *)
Theorem two_increments_ok_again :
invariantFor (trsys_of $0 {} two_increments)
(fun p => let '(_, _, c) := p in
notAboutToFail c = true).
Proof.
Admitted.
(* apply step_stepL.
apply step_stepL.
unfold two_increments, two_increments_thread.
simplify.
eapply invariant_weaken.
@ -336,11 +391,23 @@ Admitted.
simplify.
propositional; subst; equality.
Qed.*)
Qed.
(** * Optimization #2: partial-order reduction *)
(* There was a key property lurking behind the soundness proof of our last
* optimization: *commutativity*, one of the most common ways to tame the
* state-space explosion from concurrency scheduling. Specifically, the local
* steps performed by [runLocal] all *commute* with any steps taken in other
* threads, because they are agnostic to shared state. Can we generalize the
* technique to also harness commutativity of operations that *do* depend on the
* shared state, but in particular controlled ways? Why, yes we can! The most
* popular such technique from the model-checking world is
* *partial order reduction*. *)
(* First, here's an example where we should be able to do better than allowing
* either thread to run in every step, as we model-check. *)
Example independent_threads :=
(a <- Read 0;
_ <- Write 1 (a + 1);
@ -352,13 +419,14 @@ Example independent_threads :=
|| (b <- Read 2;
Write 2 (b + 1)).
(* Unfortunately, our existing model-checker does in fact follow the
* "exponential" strategy to build the state space. *)
Theorem independent_threads_ok :
invariantFor (trsys_of $0 {} independent_threads)
(fun p => let '(_, _, c) := p in
notAboutToFail c = true).
Proof.
Admitted.
(* apply step_stepL.
apply step_stepL.
unfold independent_threads.
simplify.
eapply invariant_weaken.
@ -373,14 +441,26 @@ Admitted.
simplify.
propositional; subst; equality.
Qed.*)
Qed.
(* It turns out that we can actually do model-checking where at each point we
* only explore the result of running *the first thread that is ready*! Such a
* strategy isn't sound for all programs, but it is for our example here. Why?
* Every pair of atomic actions between threads *commutes*. That is, neither
* one affects whether the other is enabled to execute (the way that one [Lock]
* can disable another), and running the two actions in either order modifies
* shared state identically. In such a case, we may always pick our favorite
* thread to step next. *)
(* To make all that formal, we will do some static program analyze to summarize
* which atomic actions a thread might take. *)
Record summary := {
Reads : set nat;
Writes : set nat;
Locks : set nat
}.
(* Here is a relation to check the accuracy of a summary for a single thread. *)
Inductive summarize : cmd -> summary -> Prop :=
| SumReturn : forall r s,
summarize (Return r) s
@ -403,6 +483,8 @@ Inductive summarize : cmd -> summary -> Prop :=
a \in s.(Locks)
-> summarize (Unlock a) s.
(* And here's one to check the accuracy of a summary for a list of threads.
* Each thread is packaged with its verified summary in the list. *)
Inductive summarizeThreads : cmd -> list (cmd * summary) -> Prop :=
| StPar : forall c1 c2 ss1 ss2,
summarizeThreads c1 ss1
@ -411,7 +493,10 @@ Inductive summarizeThreads : cmd -> list (cmd * summary) -> Prop :=
| StAtomic : forall c s,
summarize c s
-> summarizeThreads c [(c, s)].
(* We will use these expanded lists as the command type in the new semantics. *)
(* To check commutativity, it is helpful to know which atomic command a thread
* could run next. *)
Inductive nextAction : cmd -> cmd -> Prop :=
| NaReturn : forall r,
nextAction (Return r) (Return r)
@ -429,6 +514,10 @@ Inductive nextAction : cmd -> cmd -> Prop :=
nextAction c1 c
-> nextAction (Bind c1 c2) c.
(* We can succinctly capture which summaries describe threads that will commute
* with a particular atomic action. The guarantee applies not just to the
* thread's first action but also to all others that it might reach later in
* execution. *)
Definition commutes (c : cmd) (s : summary) : Prop :=
match c with
| Return _ => True
@ -440,24 +529,48 @@ Definition commutes (c : cmd) (s : summary) : Prop :=
| _ => False
end.
(* Now the new semantics: *)
Inductive stepC : heap * locks * list (cmd * summary) -> heap * locks * list (cmd * summary) -> Prop :=
(* It is always OK to let the first thread run. *)
| StepFirst : forall h l c h' l' c' s cs,
step (h, l, c) (h', l', c')
-> stepC (h, l, (c, s) :: cs) (h', l', (c', s) :: cs)
(* However, you may only pick another thread to run if it would be unsound to
* consider just the first thread. The negation of the soundness condition is
* expressed in the first premise below. *)
| StepAny : forall h l c h' l' s cs1 c1 s1 cs2 c1',
(forall c0 h'' l'' c'', nextAction c c0
(* The first thread [c] has some atomic action [c0]
* ready to run. *)
-> List.Forall (fun c_s => commutes c0 (snd c_s)) (cs1 ++ (c1, s1) :: cs2)
(* All other threads only contain actiosn that commute
* with [c0]. *)
-> step (h, l, c) (h'', l'', c'')
(* Finaly, [c] is actually enabled to run, which might
* not be the case if [c0] is a locking command. *)
-> False)
(* If we passed that check, then we can step a single thread as expected! *)
-> step (h, l, c1) (h', l', c1')
-> stepC (h, l, (c, s) :: cs1 ++ (c1, s1) :: cs2) (h', l', (c, s) :: cs1 ++ (c1', s1) :: cs2).
(* Notice how this definition turns the partial-order-reduction optimization
* "off and on" during state-space exploration. We only restrict our attention
* to the first thread so long as the soundness condition above is true. *)
Definition trsys_ofC (h : heap) (l : locks) (cs : list (cmd * summary)) := {|
Initial := {(h, l, cs)};
Step := stepC
|}.
(* Now we come to quite a few fairly complex lemmas.
* First, [commutes] really does allow other commands to swap order with the
* atomic action in question. *)
Lemma commutes_sound' : forall h l c2 h' l' c2',
step (h, l, c2) (h', l', c2')
-> forall s c1 h'' l'' c1', step (h', l', c1) (h'', l'', c1')
@ -519,6 +632,8 @@ Proof.
sets.
Qed.
(* Commentary now resumes at [commutes_sound]. *)
Lemma step_nextAction_Return : forall r h l c h' l' c',
step (h, l, c) (h', l', c')
-> nextAction c (Return r)
@ -568,6 +683,8 @@ Proof.
induct 1; auto.
Qed.
(* [commutes] allows order-swapping even when the atomic action is embedded
* further within the structure of a larger command. *)
Lemma commutes_sound : forall h l c2 h' l' c2',
step (h, l, c2) (h', l', c2')
-> forall s c1 c0 h'' l'' c1', step (h', l', c1) (h'', l'', c1')
@ -609,6 +726,9 @@ Qed.
Hint Constructors summarize.
(* The next two lemmas show that, once a summary is accurate for a command, it
* remains accurate throughout the whole execution lifetime of the command. *)
Lemma summarize_step : forall h l c h' l' c' s,
step (h, l, c) (h', l', c')
-> summarize c s
@ -628,6 +748,11 @@ Proof.
eauto using summarize_step.
Qed.
(* The next technical device will require that we bound how many steps of
* execution particular commands could run for. We use a conservative
* overapproximation that is easy to compute, phrased as a relation.
* Yes, it is time to get scared, as we must define exponentiation to compute
* large enough time bounds! *)
Fixpoint pow2 (n : nat) : nat :=
match n with
| O => 1
@ -656,6 +781,8 @@ Inductive boundRunningTime : cmd -> nat -> Prop :=
-> boundRunningTime c2 n2
-> boundRunningTime (Par c1 c2) (pow2 (n1 + n2)).
(* Perhaps surprisingly, there exist commands that have no finite time bounds!
* Mixed-embedding languages often have these counterintuitive properties. *)
Theorem boundRunningTime_not_total : exists c, forall n, ~boundRunningTime c n.
Proof.
Fixpoint scribbly (n : nat) : cmd :=
@ -682,6 +809,8 @@ Proof.
linear_arithmetic.
Qed.
(* Next, some boring properties of [pow2]. *)
Lemma pow2_pos : forall n,
pow2 n > 0.
Proof.
@ -737,6 +866,7 @@ Qed.
Hint Constructors boundRunningTime.
(* Key property: taking a step of execution lowers the running-time bound. *)
Lemma boundRunningTime_step : forall c n h l h' l',
boundRunningTime c n
-> forall c', step (h, l, c) (h', l', c')
@ -756,8 +886,34 @@ Proof.
eauto 6.
Qed.
Lemma boundRunningTime_steps : forall h l c h' l' c',
step^* (h, l, c) (h', l', c')
-> forall n, boundRunningTime c n
-> exists n', boundRunningTime c' n' /\ n' <= n.
Proof.
induct 1; simplify; eauto.
cases y.
cases p.
specialize (boundRunningTime_step H1 H); first_order.
eapply IHtrc in H2; eauto.
first_order.
eauto.
Qed.
(* Here we get a bit naughty and begin to depend on *classical logic*, as with
* the *law of the excluded middle*: [forall P, P \/ ~P]. You may not have
* noticed that we've never applied that principle explicitly so far! *)
Require Import Classical.
(* A very useful property: when a command has bounded running time, any
* execution starting from that command can be *completed* to one ending in a
* stuck state. This property definitely wouldn't be true without the bound,
* if our language had explicit, unbounded loops.
*
* The fun thing about this proof is that we are essentially using tactics to
* define an interpreter for the object language, making arbitrary scheduling
* choices. Implicit in the derivation is a proof that this interpreter always
* terminates, which we get by strong induction on the running-time bound. *)
Theorem complete_trace : forall k c n,
boundRunningTime c n
-> n <= k
@ -898,6 +1054,9 @@ Proof.
eauto.
Qed.
(* We will apply completion to traces that end in violation of the
* not-about-to-fail invariant. It is important that any extension of such a
* trace preserves that property. *)
Lemma notAboutToFail_step : forall h l c h' l' c',
step (h, l, c) (h', l', c')
-> notAboutToFail c = false
@ -925,20 +1084,8 @@ Proof.
eauto using notAboutToFail_step.
Qed.
Lemma boundRunningTime_steps : forall h l c h' l' c',
step^* (h, l, c) (h', l', c')
-> forall n, boundRunningTime c n
-> exists n', boundRunningTime c' n' /\ n' <= n.
Proof.
induct 1; simplify; eauto.
cases y.
cases p.
specialize (boundRunningTime_step H1 H); first_order.
eapply IHtrc in H2; eauto.
first_order.
eauto.
Qed.
(* One last technical device: we define a variant of [step^*] that tracks how
* many steps were made, which will come in handy for induction shortly. *)
Inductive stepsi : nat -> heap * locks * cmd -> heap * locks * cmd -> Prop :=
| StepsiO : forall st,
stepsi O st st
@ -956,6 +1103,8 @@ Proof.
induct 1; first_order; eauto.
Qed.
(* Some helper lemmas about Coq's quantification over lists *)
Lemma Exists_app_fwd : forall A (P : A -> Prop) ls1 ls2,
Exists P (ls1 ++ ls2)
-> Exists P ls1 \/ Exists P ls2.
@ -973,6 +1122,42 @@ Proof.
invert H0; eauto.
Qed.
Lemma Forall_app_fwd1 : forall A (P : A -> Prop) ls1 ls2,
Forall P (ls1 ++ ls2)
-> Forall P ls1.
Proof.
induct ls1; invert 1; eauto.
Qed.
Lemma Forall_app_fwd2 : forall A (P : A -> Prop) ls1 ls2,
Forall P (ls1 ++ ls2)
-> Forall P ls2.
Proof.
induct ls1; invert 1; simplify; subst; eauto.
Qed.
Hint Immediate Forall_app_fwd1 Forall_app_fwd2.
Lemma Forall_app_bwd : forall A (P : A -> Prop) ls1 ls2,
Forall P ls1
-> Forall P ls2
-> Forall P (ls1 ++ ls2).
Proof.
induct 1; simplify; eauto.
Qed.
Hint Resolve Forall_app_bwd.
Lemma Forall2 : forall A (P Q R : A -> Prop) ls,
Forall P ls
-> Forall Q ls
-> (forall x, P x -> Q x -> R x)
-> Forall R ls.
Proof.
induct 1; invert 1; eauto.
Qed.
(* A connection between [notAboutToFail] in the old and new worlds *)
Lemma summarizeThreads_aboutToFail : forall c cs,
summarizeThreads c cs
-> notAboutToFail c = false
@ -998,6 +1183,8 @@ Hint Immediate summarizeThreads_nonempty.
Hint Constructors stepC summarizeThreads.
(* When we step a summarized thread, we can duplicate the step within one of the
* elements of the summary. *)
Lemma step_pick : forall h l c h' l' c',
step (h, l, c) (h', l', c')
-> forall cs, summarizeThreads c cs
@ -1047,7 +1234,10 @@ Proof.
exact l'.
exact h'.
Qed.
(* The next few lemmas are quite technical. Commentary resumes for
* [translate_trace]. *)
Lemma translate_trace_matching : forall h l c h' l' c',
step (h, l, c) (h', l', c')
-> forall c0 s cs, summarizeThreads c ((c0, s) :: cs)
@ -1135,22 +1325,6 @@ Proof.
linear_arithmetic.
Qed.
Lemma Forall_app_fwd1 : forall A (P : A -> Prop) ls1 ls2,
Forall P (ls1 ++ ls2)
-> Forall P ls1.
Proof.
induct ls1; invert 1; eauto.
Qed.
Lemma Forall_app_fwd2 : forall A (P : A -> Prop) ls1 ls2,
Forall P (ls1 ++ ls2)
-> Forall P ls2.
Proof.
induct ls1; invert 1; simplify; subst; eauto.
Qed.
Hint Immediate Forall_app_fwd1 Forall_app_fwd2.
Lemma commute_writes : forall c1 c a s h l1' h' l' v,
nextAction c1 c
-> a \in Writes s
@ -1314,16 +1488,6 @@ Proof.
eauto.
Qed.
Lemma Forall_app_bwd : forall A (P : A -> Prop) ls1 ls2,
Forall P ls1
-> Forall P ls2
-> Forall P (ls1 ++ ls2).
Proof.
induct 1; simplify; eauto.
Qed.
Hint Resolve Forall_app_bwd.
Lemma split_app : forall A (l1 l2 r1 r2 : list A),
l1 ++ l2 = r1 ++ r2
-> (exists r12, r1 = l1 ++ r12
@ -1444,15 +1608,12 @@ Proof.
induct 1; eauto.
Qed.
Lemma Forall2 : forall A (P Q R : A -> Prop) ls,
Forall P ls
-> Forall Q ls
-> (forall x, P x -> Q x -> R x)
-> Forall R ls.
Proof.
induct 1; invert 1; eauto.
Qed.
(* The heart of the soundness proof! When a length-[i] derivation gets us to a
* stuck state that is about to fail, and when we have summarized the program,
* we can run that summary in the optimized semantics and also arrive at a state
* that is about to fail. Thus, if we explore the optimized state space and
* find no failures, we can conclude lack of reachable failures in the original
* state space. *)
Lemma translate_trace : forall i h l c h' l' c',
stepsi i (h, l, c) (h', l', c')
-> (forall h'' l'' c'', step (h', l', c') (h'', l'', c'') -> False)
@ -1505,6 +1666,8 @@ Proof.
induct 1; invert 1; equality.
Qed.
(* This theorem brings it all together, to reduce one invariant-proof problem to
* another that uses the optimized semantics. *)
Theorem step_stepC : forall h l c (cs : list (cmd * summary)) n,
summarizeThreads c cs
-> boundRunningTime c n
@ -1542,6 +1705,9 @@ Proof.
assumption.
Qed.
(* Now we define some tactics to help us apply this technique automatically for
* concrete programs. As usual, we won't explain how the tactics work. *)
Ltac analyzer := repeat (match goal with
| [ |- context[if ?E then _ else _] ] => cases E
| _ => econstructor
@ -1574,11 +1740,16 @@ Ltac por_step :=
Ltac por_done :=
apply MscDone; eapply oneStepClosure_solve; [ por_closure | simplify; solve [ sets ] ].
(* OK, ready to return to our last example! This time we will see state-space
* exploration that steps a single thread at a time, where the final invariant
* includes no states with multiple *partially executed* threads. *)
Theorem independent_threads_ok_again :
invariantFor (trsys_of $0 {} independent_threads)
(fun p => let '(_, _, c) := p in
notAboutToFail c = true).
Proof.
(* We need to supply that summary when invoking the proof principle, though we
* could also have used Ltac to compute it automatically. *)
eapply step_stepC with (cs := [(_, {| Reads := {0, 1};
Writes := {1};
Locks := {} |})]
@ -1606,6 +1777,8 @@ Proof.
sets.
(* We computed an inexact running time. By filling in zeroes for some
* existential variables, we commit to a concrete bound. *)
Grab Existential Variables.
exact 0.
exact 0.