From 6c1af44f9502d267255437293959babcac8480e2 Mon Sep 17 00:00:00 2001
From: Adam Chlipala <adam@chlipala.net>
Date: Sun, 19 Mar 2017 18:07:21 -0400
Subject: [PATCH] CompilerCorrectness chapter: simulation with skipping, after
 adding termination as an observable

---
 CompilerCorrectness.v | 135 +++++++++++++++++++++++++++++++++++++++---
 frap_book.tex         |  97 ++++++++++++++++++++++++++++--
 2 files changed, 221 insertions(+), 11 deletions(-)

diff --git a/CompilerCorrectness.v b/CompilerCorrectness.v
index b0e6413..a642c4e 100644
--- a/CompilerCorrectness.v
+++ b/CompilerCorrectness.v
@@ -109,10 +109,14 @@ Inductive cstep : valuation * cmd -> option nat -> valuation * cmd -> Prop :=
 
 (* To characterize correct compilation, it is helpful to define a relation to
  * capture which output _traces_ a command might generate.  Note that, for us, a
- * trace is a list of output values, where [None] labels are simply dropped. *)
-Inductive generate : valuation * cmd -> list nat -> Prop :=
+ * trace is a list of output values and/or terminating markers.  We drop silent
+ * labels as we run, and we use [Some n] for outputting of [n] and [None] for
+ * termination. *)
+Inductive generate : valuation * cmd -> list (option nat) -> Prop :=
 | GenDone : forall vc,
   generate vc []
+| GenSkip : forall v,
+  generate (v, Skip) [None]
 | GenSilent : forall vc vc' ns,
   cstep vc None vc'
   -> generate vc' ns
@@ -120,7 +124,7 @@ Inductive generate : valuation * cmd -> list nat -> Prop :=
 | GenOutput : forall vc n vc' ns,
   cstep vc (Some n) vc'
   -> generate vc' ns
-  -> generate vc (n :: ns).
+  -> generate vc (Some n :: ns).
 
 Hint Constructors plug step0 cstep generate.
 
@@ -178,7 +182,7 @@ Hint Extern 1 (interp _ _ = _) => simplify; congruence.
 Hint Extern 1 (interp _ _ <> _) => simplify; congruence.
 
 Theorem first_few_values :
-  generate ($0, month_boundaries_in_days) [28; 56].
+  generate ($0, month_boundaries_in_days) [Some 28; Some 56].
 Proof.
   unfold month_boundaries_in_days.
   eapply GenSilent.
@@ -385,6 +389,10 @@ Section simulation.
   Proof.
     induct 1; simplify; eauto.
 
+    cases vc2.
+    apply agree_on_termination in H; subst.
+    auto.
+
     eapply one_step in H; eauto.
     first_order.
     eauto.
@@ -409,6 +417,16 @@ Section simulation.
   Proof.
     induct 1; simplify; eauto.
 
+    cases vc1.
+    assert (c = Skip \/ exists v' l c', cstep (v0, c) l (v', c')) by apply skip_or_step.
+    first_order; subst.
+    auto.
+    eapply one_step in H; eauto.
+    first_order.
+    invert H.
+    invert H4.
+    invert H5.
+    
     cases vc1; cases vc.
     assert (c = Skip \/ exists v' l c', cstep (v, c) l (v', c')) by apply skip_or_step.
     first_order; subst.
@@ -538,6 +556,10 @@ Proof.
   induct 1; simplify; eauto.
   invert H1; auto.
 
+  invert H.
+  invert H3.
+  invert H4.
+
   eapply deterministic in H; eauto.
   propositional; subst.
   auto.
@@ -555,7 +577,7 @@ Proof.
 Qed.
 
 Lemma generate_Skip : forall v a ns,
-    generate (v, Skip) (a :: ns)
+    generate (v, Skip) (Some a :: ns)
     -> False.
 Proof.
   induct 1; simplify.
@@ -605,6 +627,11 @@ Section simulation_skipping.
   Proof.
     induct 1; simplify; eauto.
 
+    cases vc2.
+    apply agree_on_termination in H.
+    subst.
+    auto.
+
     eapply one_step in H; eauto.
     first_order.
     eauto.
@@ -664,12 +691,57 @@ Section simulation_skipping.
     eauto 6.
   Qed.
 
+  Lemma step_to_termination : forall vc v,
+      silent_cstep^* vc (v, Skip)
+      -> generate vc [None].
+  Proof.
+    clear; induct 1; eauto.
+  Qed.
+
+  Hint Resolve step_to_termination.
+
+  Lemma R_Skip : forall n vc1 v,
+      R n vc1 (v, Skip)
+      -> exists v', silent_cstep^* vc1 (v', Skip).
+  Proof.
+    induct n; simplify.
+
+    cases vc1.
+    assert (c = Skip \/ exists v' l c', cstep (v0, c) l (v', c')) by apply skip_or_step.
+    first_order; subst.
+    eauto.
+    eapply one_step in H; eauto.
+    first_order.
+    equality.
+    invert H.
+    invert H4.
+    invert H5.
+
+    cases vc1.
+    assert (c = Skip \/ exists v' l c', cstep (v0, c) l (v', c')) by apply skip_or_step.
+    first_order; subst.
+    eauto.
+    eapply one_step in H; eauto.
+    first_order; subst.
+    invert H.
+    apply IHn in H2.
+    first_order.
+    eauto.
+    invert H.
+    invert H4.
+    invert H5.
+  Qed.
+
   Lemma simulation_skipping_bwd' : forall ns vc2, generate vc2 ns
     -> forall n vc1, R n vc1 vc2
       -> generate vc1 ns.
   Proof.
     induct 1; simplify; eauto.
 
+    cases vc1.
+    apply R_Skip in H; first_order.
+    eauto.
+
     eapply match_step in H1; eauto.
     first_order.
     eauto.
@@ -768,6 +840,15 @@ Qed.
 
 Hint Resolve plug_countIfs.
 
+Hint Extern 1 (interp ?e _ = _) =>
+  match goal with
+  | [ H : cfoldArith e = _ |- _ ] => rewrite <- cfoldArith_ok; rewrite H
+  end.
+Hint Extern 1 (interp ?e _ <> _) =>
+  match goal with
+  | [ H : cfoldArith e = _ |- _ ] => rewrite <- cfoldArith_ok; rewrite H
+  end.
+
 (* The final proof is fairly straightforward now. *)
 Lemma cfold_ok : forall v c,
     (v, c) =| (v, cfold c).
@@ -996,6 +1077,10 @@ Section simulation_multiple.
   Proof.
     induct 1; simplify; eauto.
 
+    cases vc2.
+    apply agree_on_termination in H; subst.
+    auto.
+
     eapply one_step in H; eauto.
     first_order.
     eauto.
@@ -1014,9 +1099,11 @@ Section simulation_multiple.
   (* The backward proof essentially proceeds by strong induction on
    * _how many steps it took to generate a trace_, which we facilitate by
    * defining a [generate] variant parameterized by a step count. *)
-  Inductive generateN : nat -> valuation * cmd -> list nat -> Prop :=
+  Inductive generateN : nat -> valuation * cmd -> list (option nat) -> Prop :=
   | GenDoneN : forall vc,
       generateN 0 vc []
+  | GenSkupN : forall v,
+      generateN 0 (v, Skip) [None]
   | GenSilentN : forall sc vc vc' ns,
       cstep vc None vc'
       -> generateN sc vc' ns
@@ -1024,7 +1111,7 @@ Section simulation_multiple.
   | GenOutputN : forall sc vc n vc' ns,
       cstep vc (Some n) vc'
       -> generateN sc vc' ns
-      -> generateN (S sc) vc (n :: ns).
+      -> generateN (S sc) vc (Some n :: ns).
 
   (* We won't comment on the other proof details, though they could be
    * interesting reading. *)
@@ -1054,6 +1141,11 @@ Section simulation_multiple.
   Proof.
     clear; induct 1; simplify; eauto.
 
+    invert H; eauto.
+    invert H0.
+    invert H3.
+    invert H4.
+
     invert H1; eauto.
     eapply deterministic in H; eauto.
     propositional; subst.
@@ -1066,6 +1158,13 @@ Section simulation_multiple.
     equality.
   Qed.
 
+  Lemma termination_is_last : forall sc vc ns,
+      generateN sc vc (None :: ns)
+      -> ns = [].
+  Proof.
+    induct 1; eauto.
+  Qed.
+
   Lemma simulation_multiple_bwd' : forall sc sc', sc' < sc
     -> forall vc2 ns, generateN sc' vc2 ns
     -> forall vc1, R vc1 vc2
@@ -1078,17 +1177,39 @@ Section simulation_multiple.
     cases sc'.
     invert H0.
     auto.
+
+    cases vc1.
+    assert (c = Skip \/ exists v' l c', cstep (v0, c) l (v', c')) by apply skip_or_step.
+    first_order; subst.
+    auto.
+    eapply one_step in H1; eauto.
+    first_order.
+    invert H1.
+    invert H2.
+    invert H5.
+    invert H6.
+    invert H4.
+    invert H7.
+    invert H8.
+
     cases vc1; cases vc2.
     assert (c = Skip \/ exists v' l c', cstep (v, c) l (v', c')) by apply skip_or_step.
     first_order; subst.
     apply agree_on_termination in H1; subst.
     cases ns; auto.
+    cases o.
     exfalso; eauto.
+    eapply termination_is_last in H0; subst.
+    auto.
+
     eapply one_step in H1; eauto.
     first_order.
     eapply generateN_silent_cstep in H0; eauto.
     first_order.
     invert H5; auto.
+    invert H3.
+    invert H7.
+    invert H8.
     eapply deterministic in H3; eauto.
     propositional; subst.
     econstructor.
diff --git a/frap_book.tex b/frap_book.tex
index 83d4afc..af81572 100644
--- a/frap_book.tex
+++ b/frap_book.tex
@@ -2311,17 +2311,19 @@ $$\infer{\smallstepcl{(v, C[c])}{\ell}{(v', C[c'])}}{
 }$$
 
 \newcommand{\Tr}[1]{\mathsf{Tr}(#1)}
+\newcommand{\terminate}[0]{\mathsf{terminate}}
 
 To reason about infinite executions, we need a new abstraction, compared to what has worked in our invariant-based proofs so far.
-That abstraction will be \emph{traces}\index{traces}, sequences of outputs that a program might be observed to generate.
+That abstraction will be \emph{traces}\index{traces}, sequences of outputs (and termination events) that a program might be observed to generate.
 We define a command's trace set inductively.
 Recall that $\cdot$ is the empty list, while $\bowtie$ does list concatenation.
 $$\infer{\cdot \in \Tr{s}}{}
+\quad \infer{\terminate \in \Tr{(v, \skipe)}}{}
 \quad \infer{t \in \Tr{s}}{
   \smallstepcl{s}{\silent}{s'}
   & t \in \Tr{s'}
 }
-\quad \infer{\concat{n}{t} \in \Tr{s}}{
+\quad \infer{\concat{\outp{n}}{t} \in \Tr{s}}{
   \smallstepcl{s}{n}{s'}
   & t \in \Tr{s'}
 }$$
@@ -2385,7 +2387,7 @@ Instead of showing that every step preserves a one-state predicate, we show that
 The simulation approach is as general for relating programs as the invariant approach is for verifying individual programs.
 
 \begin{theorem}
-  If there exists a simulation $R$ such that $s_1 \; R \; s_2$, then $\treq{s_1}{s_2}$.
+  \label{simulation_ok}If there exists a simulation $R$ such that $s_1 \; R \; s_2$, then $\treq{s_1}{s_2}$.
 \end{theorem}
 \begin{proof}
   We prove the two trace-inclusion directions separately.
@@ -2405,7 +2407,7 @@ The simulation approach is as general for relating programs as the invariant app
 We can apply this very general principle to constant folding.
 
 \begin{theorem}
-  For any $v$ and $c$, $\treq{(v, c)}{(v, \cfold{c})}$.
+  \label{cfold_ok}For any $v$ and $c$, $\treq{(v, c)}{(v, \cfold{c})}$.
 \end{theorem}
 \begin{proof}
   By a simulation argument using this relation:
@@ -2418,6 +2420,93 @@ We can apply this very general principle to constant folding.
 \end{proof}
 
 
+\section{Simulations That Allow Skipping Steps}
+
+\newcommand{\cfoldt}[1]{\mathsf{cfold}_2(#1)}
+
+Consider an evolution of our constant-folding optimization to take advantage of known values of $\mathsf{if}$ test expressions.
+Depending on whether the value is zero, we can replace the whole $\mathsf{if}$ with one of its two cases.
+We will write $\cfoldt{c}$ for this expanded optimization and work up to proving it sound, too.
+However, we can no longer use last section's definition of simulation!
+The reason is that optimizations intentionally cut down on steps that a program needs to execute.
+Some steps of the source program now have no matching steps of the target program, say when we are stepping an $\mathsf{if}$ whose test expression had a known value.
+
+Let's take a first crack at making simulation more flexible.
+
+\begin{definition}[Simulation relation with skipping (\emph{faulty} version!)]
+  We say that binary relation $R$ over states of our object language is a \emph{simulation relation with skipping} iff:
+  \begin{enumerate}
+    \item Whenever $(v_1, \skipe) \; R \; (v_2, c_2)$, it follows that $c_2 = \skipe$.
+    \item Whenever $s_1 \; R \; s_2$ and $\smallstepcl{s_1}{\ell}{s'_1}$, then either:
+      \begin{enumerate}
+        \item there exists $s'_2$ such that $\smallstepcl{s_2}{\ell}{s'_2}$ and $s'_1 \; R \; s'_2$,
+        \item or $\ell = \silent$ and $s'_1 \; R \; s_2$.
+      \end{enumerate}
+  \end{enumerate}
+\end{definition}
+
+In other words, to match a silent step, it suffices to do nothing, so long as $R$ still holds afterward.
+
+\newcommand{\addad}[1]{\mathsf{withAds}(#1)}
+
+We didn't mark the definition as \emph{faulty} for nothing.
+It actually does not imply trace equivalence.
+Consider a questionable ``optimization'' defined as $\addad{\while{1}{\skipe}} = \while{1}{\outp{0}}$, and $\addad{c} = c$ for all other $c$.
+It adds a little extra advertisement into a particular infinite loop.
+Now we define a candidate simulation relation.
+\begin{eqnarray*}
+  (v_1, c_1) \; R \; (v_2, c_2) &=& v_1 = v_2 \land c_1 \in \{\while{1}{\skipe}, (\skipe; \while{1}{\skipe})\}
+\end{eqnarray*}
+This suspicious relation records nothing about $c_2$.
+The $\skipe$ condition of simulations is handled trivially, as we can see by inspection that $R$ does not allow $c_1$ to be $\skipe$.
+Checking the execution-matching condition of simulations, $c_1$ is either $\while{1}{\skipe}$ or $(\skipe; \while{1}{\skipe})$, each of which steps silently to the other.
+We may match either step by keeping $c_2$ in place, as $R$ does not constrain $c_2$ at all.
+Thus, $R$ is a simulation relation with skipping, and, for $c = \while{1}{\skipe}$, it relates $c$ to $\addad{c}$.
+
+From here we expect to conclude trace equivalence.
+However, clearly $\mathsf{withAds}$ can turn a program that never outputs into a program that outputs infinitely often!
+
+Let's patch our definition.
+
+\begin{definition}[Simulation relation with skipping]
+  We say that an $\mathbb N$-indexed family of binary relations $R_n$ over states of our object language is a \emph{simulation relation with skipping} iff:
+  \begin{enumerate}
+    \item Whenever $(v_1, \skipe) \; R_n \; (v_2, c_2)$, it follows that $c_2 = \skipe$.
+    \item Whenever $s_1 \; R_n \; s_2$ and $\smallstepcl{s_1}{\ell}{s'_1}$, then either:
+      \begin{enumerate}
+        \item there exist $n'$ and $s'_2$ such that $\smallstepcl{s_2}{\ell}{s'_2}$ and $s'_1 \; R_{n'} \; s'_2$,
+        \item or $n > 0$, $\ell = \silent$, and $s'_1 \; R_{n-1} \; s_2$.
+      \end{enumerate}
+  \end{enumerate}
+\end{definition}
+
+This new version imposes a finite limit $n$ at any point, on how many times the righthand side may match lefthand steps without stepping itself.
+Our bad counterexample fails to satisfy the conditions, because eventually the starting step count $n$ will be used up, and the incorrect ``optimized'' program will be forced to reveal itself by taking a step that outputs.
+
+\begin{theorem}
+  If there exists a simulation with skipping $R$ such that $s_1 \; R_n \; s_2$, then $\treq{s_1}{s_2}$.
+\end{theorem}
+\begin{proof}
+  The proof is fairly similar to that of Theorem \ref{simulation_ok}.
+  To show termination preservation in the backward direction, we find ourselves proving a lemma by induction on $n$.
+\end{proof}
+
+\newcommand{\countIfs}[1]{\mathsf{countIfs}(#1)}
+
+\begin{theorem}
+  For any $v$ and $c$, $\treq{(v, c)}{(v, \cfoldt{c})}$.
+\end{theorem}
+\begin{proof}
+  By a simulation argument (with skipping) using this relation:
+  \begin{eqnarray*}
+    (v_1, c_1) \; R_n \; (v_2, c_2) &=& v_1 = v_2 \land c_2 = \cfoldt{c_1} \land \countIfs{c_1} < n
+  \end{eqnarray*}
+  We rely on a simple helper function $\countIfs{c}$ to count how many $\mathsf{If}$ nodes appear in the syntax of $c$.
+  This notion turns out to be a conservative upper bound on how many times in a row we will need to let lefthand steps go unmatched on the right.
+  The rest of the proof proceeds essentially the same way as in Theorem \ref{cfold_ok}.
+\end{proof}
+
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 \chapter{Lambda Calculus and Simple Type Safety}