diff --git a/SharedMemory.v b/SharedMemory.v
index 2c87f2c..4a88279 100644
--- a/SharedMemory.v
+++ b/SharedMemory.v
@@ -12,20 +12,21 @@ Set Asymmetric Patterns.
 
 Notation "m $! k" := (match m $? k with Some n => n | None => O end) (at level 30).
 Definition heap := fmap nat nat.
-Definition assertion := heap -> Prop.
 
 Hint Extern 1 (_ <= _) => linear_arithmetic.
 Hint Extern 1 (@eq nat _ _) => linear_arithmetic.
 
-Ltac simp := repeat (simplify; subst; propositional;
-                     try match goal with
-                         | [ H : ex _ |- _ ] => invert H
-                         end); try linear_arithmetic.
-
 
 (** * An object language with shared-memory concurrency *)
 
-(* Let's simplify the encoding by only working with commands that generate
+(* We're going to start investigating how to verify concurrent programs whose
+ * behavior is given with operational semantics.  There are a variety of
+ * different concurrency styles out there, with their distinctive practical and
+ * theoretical benefits; we'll start with the most venerable style, shared
+ * memory. *)
+
+(* We'll build on the mixed-embedding languages from the last two chapter. 
+ * Let's simplify the encoding by only working with commands that generate
  * [nat]. *)
 Inductive loop_outcome :=
 | Done (a : nat)
@@ -48,8 +49,13 @@ Inductive cmd :=
 Notation "x <- c1 ; c2" := (Bind c1 (fun x => c2)) (right associativity, at level 80).
 Infix "||" := Par.
 
+(* As the program runs, it has not just a heap but also a set of locks that are
+ * taken at that moment. *)
 Definition locks := set nat.
 
+(* The first few rules below are basically the same as in last chapter, except
+ * that we relax the restriction on only reading/writing addresses that are
+ * explicitly mapped into the heap. *)
 Inductive step : heap * locks * cmd -> heap * locks * cmd -> Prop :=
 | StepBindRecur : forall c1 c1' c2 h h' l l',
   step (h, l, c1) (h', l', c1')
@@ -62,6 +68,9 @@ Inductive step : heap * locks * cmd -> heap * locks * cmd -> Prop :=
 | StepWrite : forall h l a v,
   step (h, l, Write a v) (h $+ (a, v), l, Return 0)
 
+(* First interesting twist: we can "push steps through" the [Par] operator on
+ * either side.  The choice of a side is the sole source of nondeterminism in
+ * this semantics, corresponding to the whims of a scheduler. *)
 | StepParRecur1 : forall h l c1 c2 h' l' c1',
   step (h, l, c1) (h', l', c1')
   -> step (h, l, Par c1 c2) (h', l', Par c1' c2)
@@ -69,6 +78,7 @@ Inductive step : heap * locks * cmd -> heap * locks * cmd -> Prop :=
   step (h, l, c2) (h', l', c2')
   -> step (h, l, Par c1 c2) (h', l', Par c1 c2')
 
+(* To take a lock, it must not be held; and vice versa for releasing a lock. *)
 | StepLock : forall h l a,
   ~a \in l
   -> step (h, l, Lock a) (h, l \cup {a}, Return 0)
@@ -82,6 +92,30 @@ Definition trsys_of (h : heap) (l : locks) (c : cmd) := {|
 |}.
 
 
+
+(** * An example *)
+
+(* In this lecture, we'll focus on model checking as our program-proof
+ * technique.  Recall that model checking is all about reducing a problem to a
+ * reachability question in a finite-state system.  Our programs here have the
+ * (perhaps surprising!) property that termination is guaranteed, for any
+ * initial state, regardless of how the scheduler behaves.  Therefore, all
+ * programs of this language are finite-state and thus, in principle, amenable
+ * to model checking!  (We were careful to leave out looping constructs.)
+ * Let's define a simple two-thread program and model-check it. *)
+
+(* Throughout this file, we'll only be verifying that no thread could ever reach
+ * a [Fail] command that is next in line to execute, a property that is easy to
+ * phrase as an invariant of the transition system.  Here's how to compute
+ * whether a command is about to fail. *)
+Fixpoint notAboutToFail (c : cmd) : bool :=
+  match c with
+  | Fail => false
+  | Bind c1 _ => notAboutToFail c1
+  | Par c1 c2 => notAboutToFail c1 && notAboutToFail c2
+  | _ => true
+  end.
+
 Example two_increments_thread :=
   _ <- Lock 0;
   n <- Read 0;
@@ -93,21 +127,14 @@ Example two_increments_thread :=
 
 Example two_increments := two_increments_thread || two_increments_thread.
 
-Fixpoint notAboutToFail (c : cmd) : bool :=
-  match c with
-  | Fail => false
-  | Bind c1 _ => notAboutToFail c1
-  | Par c1 c2 => notAboutToFail c1 && notAboutToFail c2
-  | _ => true
-  end.
-
+(* Next, we do one of our standard boring (and slow; sorry!) model-checking
+ * proofs, where tactics explore the finite state space for us. *)
 Theorem two_increments_ok :
   invariantFor (trsys_of $0 {} two_increments)
                (fun p => let '(_, _, c) := p in
                          notAboutToFail c = true).
 Proof.
-Admitted.
-(*  unfold two_increments, two_increments_thread.
+  unfold two_increments, two_increments_thread.
   simplify.
   eapply invariant_weaken.
   apply multiStepClosure_ok; simplify.
@@ -130,11 +157,24 @@ Admitted.
 
   simplify.
   propositional; subst; equality.
-Qed.*)
+Qed.
+
+(* Notice how every step of the process needs to consider all possibilities of
+ * threads that could run next, which, in general, gives us state spaces of size
+ * *exponential* in the program-text length.  That's really a shame from a
+ * performance perspective, isn't it?  Our goal now will be to apply
+ * *optimizations* that show equivalence with alternative transition systems
+ * whose state spaces are smaller.  By the end, we'll be able to check
+ * nontrivial concurrent programs by only computing state spaces with *linear*
+ * size in program-text length!  (The catch is that the technique only applies
+ * for programs accepted by a simple static analysis.) *)
 
 
 (** * Optimization #1: always run all purely local actions first. *)
 
+(* Here's a function that, in a single go, performs all simplifications that are
+ * *thread-local*.  That is, no other thread can observe those steps, as they
+ * don't touch the heap or lockset. *)
 Fixpoint runLocal (c : cmd) : cmd :=
   match c with
   | Return _ => c
@@ -151,6 +191,10 @@ Fixpoint runLocal (c : cmd) : cmd :=
   | Unlock _ => c
   end.
 
+(* We can define an alternative step relation that always runs [runLocal] as a
+ * kind of postprocessing on the new command.  This way, the model checker won't
+ * need to run separate exploration steps for all of those trivial
+ * simplifications. *)
 Inductive stepL : heap * locks * cmd -> heap * locks * cmd -> Prop :=
 | StepL : forall h l c h' l' c',
   step (h, l, c) (h', l', c')
@@ -161,6 +205,8 @@ Definition trsys_ofL (h : heap) (l : locks) (c : cmd) := {|
   Step := stepL
 |}.
 
+(* Now we prove some basic facts; commentary resumes before [step_runLocal]. *)
+
 Hint Constructors step stepL.
 
 Lemma run_Return : forall h l r h' l' c,
@@ -234,6 +280,9 @@ Proof.
   equality.
 Qed.
 
+(* The key correctnss property: when an original step takes place, either it
+ * has no effect or can be duplicated when we apply [runLocal] both *before* and
+ * *after* the step. *)
 Lemma step_runLocal : forall h l c h' l' c',
   step (h, l, c) (h', l', c')
   -> (runLocal c = runLocal c' /\ h = h' /\ l = l')
@@ -267,6 +316,8 @@ Proof.
   rewrite runLocal_idem; equality.
 Qed.
 
+(* That was the main punchline.  Commentary resumes at [step_stepL]. *)
+
 Lemma step_stepL' : forall h l c h' l' c',
   step^* (h, l, c) (h', l', c')
   -> stepL^* (h, l, runLocal c) (h', l', runLocal c').
@@ -296,6 +347,9 @@ Proof.
          end; try equality.
 Qed.
 
+(* The key proof principle: to verify a can-never-fail invariant for the
+ * original semantics, it suffices to verify it for the new semantics
+ * instead. *)
 Theorem step_stepL : forall h l c ,
   invariantFor (trsys_ofL h l c) (fun p => let '(_, _, c) := p in
                                            notAboutToFail c = true)
@@ -312,13 +366,14 @@ Proof.
   apply H in H1; eauto using notAboutToFail_runLocal.
 Qed.
 
+(* Now watch as we verify that last example in fewer steps, with a smaller
+ * invariant! *)
 Theorem two_increments_ok_again :
   invariantFor (trsys_of $0 {} two_increments)
                (fun p => let '(_, _, c) := p in
                          notAboutToFail c = true).
 Proof.
-Admitted.
-(*  apply step_stepL.
+  apply step_stepL.
   unfold two_increments, two_increments_thread.
   simplify.
   eapply invariant_weaken.
@@ -336,11 +391,23 @@ Admitted.
 
   simplify.
   propositional; subst; equality.
-Qed.*)
+Qed.
 
 
 (** * Optimization #2: partial-order reduction *)
 
+(* There was a key property lurking behind the soundness proof of our last
+ * optimization: *commutativity*, one of the most common ways to tame the
+ * state-space explosion from concurrency scheduling.  Specifically, the local
+ * steps performed by [runLocal] all *commute* with any steps taken in other
+ * threads, because they are agnostic to shared state.  Can we generalize the
+ * technique to also harness commutativity of operations that *do* depend on the
+ * shared state, but in particular controlled ways?  Why, yes we can!  The most
+ * popular such technique from the model-checking world is
+ * *partial order reduction*. *)
+
+(* First, here's an example where we should be able to do better than allowing
+ * either thread to run in every step, as we model-check. *)
 Example independent_threads :=
   (a <- Read 0;
    _ <- Write 1 (a + 1);
@@ -352,13 +419,14 @@ Example independent_threads :=
   || (b <- Read 2;
        Write 2 (b + 1)).
 
+(* Unfortunately, our existing model-checker does in fact follow the
+ * "exponential" strategy to build the state space. *)
 Theorem independent_threads_ok :
   invariantFor (trsys_of $0 {} independent_threads)
                (fun p => let '(_, _, c) := p in
                          notAboutToFail c = true).
 Proof.
-Admitted.
-(*  apply step_stepL.
+  apply step_stepL.
   unfold independent_threads.
   simplify.
   eapply invariant_weaken.
@@ -373,14 +441,26 @@ Admitted.
 
   simplify.
   propositional; subst; equality.
-Qed.*)
+Qed.
 
+(* It turns out that we can actually do model-checking where at each point we
+ * only explore the result of running *the first thread that is ready*!  Such a
+ * strategy isn't sound for all programs, but it is for our example here.  Why?
+ * Every pair of atomic actions between threads *commutes*.  That is, neither
+ * one affects whether the other is enabled to execute (the way that one [Lock]
+ * can disable another), and running the two actions in either order modifies
+ * shared state identically.  In such a case, we may always pick our favorite
+ * thread to step next. *)
+
+(* To make all that formal, we will do some static program analyze to summarize
+ * which atomic actions a thread might take. *)
 Record summary := {
   Reads : set nat;
   Writes : set nat;
   Locks : set nat
 }.
 
+(* Here is a relation to check the accuracy of a summary for a single thread. *)
 Inductive summarize : cmd -> summary -> Prop :=
 | SumReturn : forall r s,
     summarize (Return r) s
@@ -403,6 +483,8 @@ Inductive summarize : cmd -> summary -> Prop :=
     a \in s.(Locks)
     -> summarize (Unlock a) s.
 
+(* And here's one to check the accuracy of a summary for a list of threads.
+ * Each thread is packaged with its verified summary in the list. *)
 Inductive summarizeThreads : cmd -> list (cmd * summary) -> Prop :=
 | StPar : forall c1 c2 ss1 ss2,
     summarizeThreads c1 ss1
@@ -411,7 +493,10 @@ Inductive summarizeThreads : cmd -> list (cmd * summary) -> Prop :=
 | StAtomic : forall c s,
     summarize c s
     -> summarizeThreads c [(c, s)].
+(* We will use these expanded lists as the command type in the new semantics. *)
 
+(* To check commutativity, it is helpful to know which atomic command a thread
+ * could run next. *)
 Inductive nextAction : cmd -> cmd -> Prop :=
 | NaReturn : forall r,
     nextAction (Return r) (Return r)
@@ -429,6 +514,10 @@ Inductive nextAction : cmd -> cmd -> Prop :=
     nextAction c1 c
     -> nextAction (Bind c1 c2) c.
 
+(* We can succinctly capture which summaries describe threads that will commute
+ * with a particular atomic action.  The guarantee applies not just to the
+ * thread's first action but also to all others that it might reach later in
+ * execution. *)
 Definition commutes (c : cmd) (s : summary) : Prop :=
   match c with
   | Return _ => True
@@ -440,24 +529,48 @@ Definition commutes (c : cmd) (s : summary) : Prop :=
   | _ => False
   end.
 
+(* Now the new semantics: *)
 Inductive stepC : heap * locks * list (cmd * summary) -> heap * locks * list (cmd * summary) -> Prop :=
+
+(* It is always OK to let the first thread run. *)
 | StepFirst : forall h l c h' l' c' s cs,
   step (h, l, c) (h', l', c')
   -> stepC (h, l, (c, s) :: cs) (h', l', (c', s) :: cs)
+
+(* However, you may only pick another thread to run if it would be unsound to
+ * consider just the first thread.  The negation of the soundness condition is
+ * expressed in the first premise below. *)
 | StepAny : forall h l c h' l' s cs1 c1 s1 cs2 c1',
   (forall c0 h'' l'' c'', nextAction c c0
+                          (* The first thread [c] has some atomic action [c0]
+                           * ready to run. *)
                           -> List.Forall (fun c_s => commutes c0 (snd c_s)) (cs1 ++ (c1, s1) :: cs2)
+                          (* All other threads only contain actiosn that commute
+                           * with [c0]. *)
+
                           -> step (h, l, c) (h'', l'', c'')
+                          (* Finaly, [c] is actually enabled to run, which might
+                           * not be the case if [c0] is a locking command. *)
+
                           -> False)
+
+  (* If we passed that check, then we can step a single thread as expected! *)
   -> step (h, l, c1) (h', l', c1')
   -> stepC (h, l, (c, s) :: cs1 ++ (c1, s1) :: cs2) (h', l', (c, s) :: cs1 ++ (c1', s1) :: cs2).
 
+(* Notice how this definition turns the partial-order-reduction optimization
+ * "off and on" during state-space exploration.  We only restrict our attention
+ * to the first thread so long as the soundness condition above is true. *)
+
 Definition trsys_ofC (h : heap) (l : locks) (cs : list (cmd * summary)) := {|
   Initial := {(h, l, cs)};
   Step := stepC
 |}.
 
 
+(* Now we come to quite a few fairly complex lemmas.
+ * First, [commutes] really does allow other commands to swap order with the
+ * atomic action in question. *)
 Lemma commutes_sound' : forall h l c2 h' l' c2',
   step (h, l, c2) (h', l', c2')
   -> forall s c1 h'' l'' c1', step (h', l', c1) (h'', l'', c1')
@@ -519,6 +632,8 @@ Proof.
   sets.
 Qed.
 
+(* Commentary now resumes at [commutes_sound]. *)
+
 Lemma step_nextAction_Return : forall r h l c h' l' c',
     step (h, l, c) (h', l', c')
     -> nextAction c (Return r)
@@ -568,6 +683,8 @@ Proof.
   induct 1; auto.
 Qed.
 
+(* [commutes] allows order-swapping even when the atomic action is embedded
+ * further within the structure of a larger command. *)
 Lemma commutes_sound : forall h l c2 h' l' c2',
   step (h, l, c2) (h', l', c2')
   -> forall s c1 c0 h'' l'' c1', step (h', l', c1) (h'', l'', c1')
@@ -609,6 +726,9 @@ Qed.
 
 Hint Constructors summarize.
 
+(* The next two lemmas show that, once a summary is accurate for a command, it
+ * remains accurate throughout the whole execution lifetime of the command. *)
+
 Lemma summarize_step : forall h l c h' l' c' s,
   step (h, l, c) (h', l', c')
   -> summarize c s
@@ -628,6 +748,11 @@ Proof.
   eauto using summarize_step.
 Qed.
 
+(* The next technical device will require that we bound how many steps of
+ * execution particular commands could run for.  We use a conservative
+ * overapproximation that is easy to compute, phrased as a relation.
+ * Yes, it is time to get scared, as we must define exponentiation to compute
+ * large enough time bounds! *)
 Fixpoint pow2 (n : nat) : nat :=
   match n with
   | O => 1
@@ -656,6 +781,8 @@ Inductive boundRunningTime : cmd -> nat -> Prop :=
     -> boundRunningTime c2 n2
     -> boundRunningTime (Par c1 c2) (pow2 (n1 + n2)).
 
+(* Perhaps surprisingly, there exist commands that have no finite time bounds!
+ * Mixed-embedding languages often have these counterintuitive properties. *)
 Theorem boundRunningTime_not_total : exists c, forall n, ~boundRunningTime c n.
 Proof.
   Fixpoint scribbly (n : nat) : cmd :=
@@ -682,6 +809,8 @@ Proof.
   linear_arithmetic.
 Qed.
 
+(* Next, some boring properties of [pow2]. *)
+
 Lemma pow2_pos : forall n,
     pow2 n > 0.
 Proof.
@@ -737,6 +866,7 @@ Qed.
 
 Hint Constructors boundRunningTime.
 
+(* Key property: taking a step of execution lowers the running-time bound. *)
 Lemma boundRunningTime_step : forall c n h l h' l',
     boundRunningTime c n
     -> forall c', step (h, l, c) (h', l', c')
@@ -756,8 +886,34 @@ Proof.
   eauto 6.
 Qed.
 
+Lemma boundRunningTime_steps : forall h l c h' l' c',
+    step^* (h, l, c) (h', l', c')
+    -> forall n, boundRunningTime c n
+    -> exists n', boundRunningTime c' n' /\ n' <= n.
+Proof.
+  induct 1; simplify; eauto.
+  cases y.
+  cases p.
+  specialize (boundRunningTime_step H1 H); first_order.
+  eapply IHtrc in H2; eauto.
+  first_order.
+  eauto.
+Qed.
+
+(* Here we get a bit naughty and begin to depend on *classical logic*, as with
+ * the *law of the excluded middle*: [forall P, P \/ ~P].  You may not have
+ * noticed that we've never applied that principle explicitly so far! *)
 Require Import Classical.
 
+(* A very useful property: when a command has bounded running time, any
+ * execution starting from that command can be *completed* to one ending in a
+ * stuck state.  This property definitely wouldn't be true without the bound,
+ * if our language had explicit, unbounded loops.
+ *
+ * The fun thing about this proof is that we are essentially using tactics to
+ * define an interpreter for the object language, making arbitrary scheduling
+ * choices.  Implicit in the derivation is a proof that this interpreter always
+ * terminates, which we get by strong induction on the running-time bound. *)
 Theorem complete_trace : forall k c n,
   boundRunningTime c n
   -> n <= k
@@ -898,6 +1054,9 @@ Proof.
   eauto.
 Qed.
 
+(* We will apply completion to traces that end in violation of the
+ * not-about-to-fail invariant.  It is important that any extension of such a
+ * trace preserves that property. *)
 Lemma notAboutToFail_step : forall h l c h' l' c',
     step (h, l, c) (h', l', c')
     -> notAboutToFail c = false
@@ -925,20 +1084,8 @@ Proof.
   eauto using notAboutToFail_step.
 Qed.
 
-Lemma boundRunningTime_steps : forall h l c h' l' c',
-    step^* (h, l, c) (h', l', c')
-    -> forall n, boundRunningTime c n
-    -> exists n', boundRunningTime c' n' /\ n' <= n.
-Proof.
-  induct 1; simplify; eauto.
-  cases y.
-  cases p.
-  specialize (boundRunningTime_step H1 H); first_order.
-  eapply IHtrc in H2; eauto.
-  first_order.
-  eauto.
-Qed.
-
+(* One last technical device: we define a variant of [step^*] that tracks how
+ * many steps were made, which will come in handy for induction shortly. *)
 Inductive stepsi : nat -> heap * locks * cmd -> heap * locks * cmd -> Prop :=
 | StepsiO : forall st,
     stepsi O st st
@@ -956,6 +1103,8 @@ Proof.
   induct 1; first_order; eauto.
 Qed.
 
+(* Some helper lemmas about Coq's quantification over lists *)
+
 Lemma Exists_app_fwd : forall A (P : A -> Prop) ls1 ls2,
     Exists P (ls1 ++ ls2)
     -> Exists P ls1 \/ Exists P ls2.
@@ -973,6 +1122,42 @@ Proof.
   invert H0; eauto.
 Qed.
 
+Lemma Forall_app_fwd1 : forall A (P : A -> Prop) ls1 ls2,
+    Forall P (ls1 ++ ls2)
+    -> Forall P ls1.
+Proof.
+  induct ls1; invert 1; eauto.
+Qed.
+
+Lemma Forall_app_fwd2 : forall A (P : A -> Prop) ls1 ls2,
+    Forall P (ls1 ++ ls2)
+    -> Forall P ls2.
+Proof.
+  induct ls1; invert 1; simplify; subst; eauto.
+Qed.
+
+Hint Immediate Forall_app_fwd1 Forall_app_fwd2.
+
+Lemma Forall_app_bwd : forall A (P : A -> Prop) ls1 ls2,
+    Forall P ls1
+    -> Forall P ls2
+    -> Forall P (ls1 ++ ls2).
+Proof.
+  induct 1; simplify; eauto.
+Qed.
+
+Hint Resolve Forall_app_bwd.
+
+Lemma Forall2 : forall A (P Q R : A -> Prop) ls,
+    Forall P ls
+    -> Forall Q ls
+    -> (forall x, P x -> Q x -> R x)
+    -> Forall R ls.
+Proof.
+  induct 1; invert 1; eauto.
+Qed.
+
+(* A connection between [notAboutToFail] in the old and new worlds *)
 Lemma summarizeThreads_aboutToFail : forall c cs,
     summarizeThreads c cs
     -> notAboutToFail c = false
@@ -998,6 +1183,8 @@ Hint Immediate summarizeThreads_nonempty.
 
 Hint Constructors stepC summarizeThreads.
 
+(* When we step a summarized thread, we can duplicate the step within one of the
+ * elements of the summary. *)
 Lemma step_pick : forall h l c h' l' c',
     step (h, l, c) (h', l', c')
     -> forall cs, summarizeThreads c cs
@@ -1047,7 +1234,10 @@ Proof.
   exact l'.
   exact h'.
 Qed.
-  
+
+(* The next few lemmas are quite technical.  Commentary resumes for
+ * [translate_trace]. *)
+
 Lemma translate_trace_matching : forall h l c h' l' c',
     step (h, l, c) (h', l', c')
     -> forall c0 s cs, summarizeThreads c ((c0, s) :: cs)
@@ -1135,22 +1325,6 @@ Proof.
   linear_arithmetic.
 Qed.
 
-Lemma Forall_app_fwd1 : forall A (P : A -> Prop) ls1 ls2,
-    Forall P (ls1 ++ ls2)
-    -> Forall P ls1.
-Proof.
-  induct ls1; invert 1; eauto.
-Qed.
-
-Lemma Forall_app_fwd2 : forall A (P : A -> Prop) ls1 ls2,
-    Forall P (ls1 ++ ls2)
-    -> Forall P ls2.
-Proof.
-  induct ls1; invert 1; simplify; subst; eauto.
-Qed.
-
-Hint Immediate Forall_app_fwd1 Forall_app_fwd2.
-
 Lemma commute_writes : forall c1 c a s h l1' h' l' v,
   nextAction c1 c
   -> a \in Writes s
@@ -1314,16 +1488,6 @@ Proof.
   eauto.
 Qed.
 
-Lemma Forall_app_bwd : forall A (P : A -> Prop) ls1 ls2,
-    Forall P ls1
-    -> Forall P ls2
-    -> Forall P (ls1 ++ ls2).
-Proof.
-  induct 1; simplify; eauto.
-Qed.
-
-Hint Resolve Forall_app_bwd.
-
 Lemma split_app : forall A (l1 l2 r1 r2 : list A),
     l1 ++ l2 = r1 ++ r2
     -> (exists r12, r1 = l1 ++ r12
@@ -1444,15 +1608,12 @@ Proof.
   induct 1; eauto.
 Qed.
 
-Lemma Forall2 : forall A (P Q R : A -> Prop) ls,
-    Forall P ls
-    -> Forall Q ls
-    -> (forall x, P x -> Q x -> R x)
-    -> Forall R ls.
-Proof.
-  induct 1; invert 1; eauto.
-Qed.
-
+(* The heart of the soundness proof!  When a length-[i] derivation gets us to a
+ * stuck state that is about to fail, and when we have summarized the program,
+ * we can run that summary in the optimized semantics and also arrive at a state
+ * that is about to fail.  Thus, if we explore the optimized state space and
+ * find no failures, we can conclude lack of reachable failures in the original
+ * state space. *)
 Lemma translate_trace : forall i h l c h' l' c',
     stepsi i (h, l, c) (h', l', c')
     -> (forall h'' l'' c'', step (h', l', c') (h'', l'', c'') -> False)
@@ -1505,6 +1666,8 @@ Proof.
   induct 1; invert 1; equality.
 Qed.
 
+(* This theorem brings it all together, to reduce one invariant-proof problem to
+ * another that uses the optimized semantics. *)
 Theorem step_stepC : forall h l c (cs : list (cmd * summary)) n,
   summarizeThreads c cs
   -> boundRunningTime c n
@@ -1542,6 +1705,9 @@ Proof.
   assumption.
 Qed.
 
+(* Now we define some tactics to help us apply this technique automatically for
+ * concrete programs.  As usual, we won't explain how the tactics work. *)
+
 Ltac analyzer := repeat (match goal with
                          | [ |- context[if ?E then _ else _] ] => cases E
                          | _ => econstructor
@@ -1574,11 +1740,16 @@ Ltac por_step :=
 Ltac por_done :=
   apply MscDone; eapply oneStepClosure_solve; [ por_closure | simplify; solve [ sets ] ].
 
+(* OK, ready to return to our last example!  This time we will see state-space
+ * exploration that steps a single thread at a time, where the final invariant
+ * includes no states with multiple *partially executed* threads. *)
 Theorem independent_threads_ok_again :
   invariantFor (trsys_of $0 {} independent_threads)
                (fun p => let '(_, _, c) := p in
                          notAboutToFail c = true).
 Proof.
+  (* We need to supply that summary when invoking the proof principle, though we
+   * could also have used Ltac to compute it automatically. *)
   eapply step_stepC with (cs := [(_, {| Reads := {0, 1};
                                         Writes := {1};
                                         Locks := {} |})]
@@ -1606,6 +1777,8 @@ Proof.
 
   sets.
 
+  (* We computed an inexact running time.  By filling in zeroes for some
+   * existential variables, we commit to a concrete bound. *)
   Grab Existential Variables.
   exact 0.
   exact 0.