CompilerCorrectness chapter: simulation with skipping, after adding termination as an observable

2025-03-23 12:34:00 +00:00 · 2017-03-19 18:07:21 -04:00 · 2017-03-19 18:07:21 -04:00 · 6c1af44f95
commit 6c1af44f95
parent 9974e130f0
2 changed files with 221 additions and 11 deletions
--- a/CompilerCorrectness.v
+++ b/CompilerCorrectness.v
@ -109,10 +109,14 @@ Inductive cstep : valuation * cmd -> option nat -> valuation * cmd -> Prop :=

 (* To characterize correct compilation, it is helpful to define a relation to
 * capture which output _traces_ a command might generate.  Note that, for us, a
- * trace is a list of output values, where [None] labels are simply dropped. *)
-Inductive generate : valuation * cmd -> list nat -> Prop :=
+ * trace is a list of output values and/or terminating markers.  We drop silent
+ * labels as we run, and we use [Some n] for outputting of [n] and [None] for
+ * termination. *)
+Inductive generate : valuation * cmd -> list (option nat) -> Prop :=
 | GenDone : forall vc,
  generate vc []
+| GenSkip : forall v,
+  generate (v, Skip) [None]
 | GenSilent : forall vc vc' ns,
  cstep vc None vc'
  -> generate vc' ns
@ -120,7 +124,7 @@ Inductive generate : valuation * cmd -> list nat -> Prop :=
 | GenOutput : forall vc n vc' ns,
  cstep vc (Some n) vc'
  -> generate vc' ns
-  -> generate vc (n :: ns).
+  -> generate vc (Some n :: ns).

 Hint Constructors plug step0 cstep generate.

@ -178,7 +182,7 @@ Hint Extern 1 (interp _ _ = _) => simplify; congruence.
 Hint Extern 1 (interp _ _ <> _) => simplify; congruence.

 Theorem first_few_values :
-  generate ($0, month_boundaries_in_days) [28; 56].
+  generate ($0, month_boundaries_in_days) [Some 28; Some 56].
 Proof.
  unfold month_boundaries_in_days.
  eapply GenSilent.
@ -385,6 +389,10 @@ Section simulation.
  Proof.
    induct 1; simplify; eauto.

+    cases vc2.
+    apply agree_on_termination in H; subst.
+    auto.
+
    eapply one_step in H; eauto.
    first_order.
    eauto.
@ -409,6 +417,16 @@ Section simulation.
  Proof.
    induct 1; simplify; eauto.

+    cases vc1.
+    assert (c = Skip \/ exists v' l c', cstep (v0, c) l (v', c')) by apply skip_or_step.
+    first_order; subst.
+    auto.
+    eapply one_step in H; eauto.
+    first_order.
+    invert H.
+    invert H4.
+    invert H5.
+    
    cases vc1; cases vc.
    assert (c = Skip \/ exists v' l c', cstep (v, c) l (v', c')) by apply skip_or_step.
    first_order; subst.
@ -538,6 +556,10 @@ Proof.
  induct 1; simplify; eauto.
  invert H1; auto.

+  invert H.
+  invert H3.
+  invert H4.
+
  eapply deterministic in H; eauto.
  propositional; subst.
  auto.
@ -555,7 +577,7 @@ Proof.
 Qed.

 Lemma generate_Skip : forall v a ns,
-    generate (v, Skip) (a :: ns)
+    generate (v, Skip) (Some a :: ns)
    -> False.
 Proof.
  induct 1; simplify.
@ -605,6 +627,11 @@ Section simulation_skipping.
  Proof.
    induct 1; simplify; eauto.

+    cases vc2.
+    apply agree_on_termination in H.
+    subst.
+    auto.
+
    eapply one_step in H; eauto.
    first_order.
    eauto.
@ -664,12 +691,57 @@ Section simulation_skipping.
    eauto 6.
  Qed.

+  Lemma step_to_termination : forall vc v,
+      silent_cstep^* vc (v, Skip)
+      -> generate vc [None].
+  Proof.
+    clear; induct 1; eauto.
+  Qed.
+
+  Hint Resolve step_to_termination.
+
+  Lemma R_Skip : forall n vc1 v,
+      R n vc1 (v, Skip)
+      -> exists v', silent_cstep^* vc1 (v', Skip).
+  Proof.
+    induct n; simplify.
+
+    cases vc1.
+    assert (c = Skip \/ exists v' l c', cstep (v0, c) l (v', c')) by apply skip_or_step.
+    first_order; subst.
+    eauto.
+    eapply one_step in H; eauto.
+    first_order.
+    equality.
+    invert H.
+    invert H4.
+    invert H5.
+
+    cases vc1.
+    assert (c = Skip \/ exists v' l c', cstep (v0, c) l (v', c')) by apply skip_or_step.
+    first_order; subst.
+    eauto.
+    eapply one_step in H; eauto.
+    first_order; subst.
+    invert H.
+    apply IHn in H2.
+    first_order.
+    eauto.
+    invert H.
+    invert H4.
+    invert H5.
+  Qed.
+
  Lemma simulation_skipping_bwd' : forall ns vc2, generate vc2 ns
    -> forall n vc1, R n vc1 vc2
      -> generate vc1 ns.
  Proof.
    induct 1; simplify; eauto.

+    cases vc1.
+    apply R_Skip in H; first_order.
+    eauto.
+
    eapply match_step in H1; eauto.
    first_order.
    eauto.
@ -768,6 +840,15 @@ Qed.

 Hint Resolve plug_countIfs.

+Hint Extern 1 (interp ?e _ = _) =>
+  match goal with
+  | [ H : cfoldArith e = _ |- _ ] => rewrite <- cfoldArith_ok; rewrite H
+  end.
+Hint Extern 1 (interp ?e _ <> _) =>
+  match goal with
+  | [ H : cfoldArith e = _ |- _ ] => rewrite <- cfoldArith_ok; rewrite H
+  end.
+
 (* The final proof is fairly straightforward now. *)
 Lemma cfold_ok : forall v c,
    (v, c) =| (v, cfold c).
@ -996,6 +1077,10 @@ Section simulation_multiple.
  Proof.
    induct 1; simplify; eauto.

+    cases vc2.
+    apply agree_on_termination in H; subst.
+    auto.
+
    eapply one_step in H; eauto.
    first_order.
    eauto.
@ -1014,9 +1099,11 @@ Section simulation_multiple.
  (* The backward proof essentially proceeds by strong induction on
   * _how many steps it took to generate a trace_, which we facilitate by
   * defining a [generate] variant parameterized by a step count. *)
-  Inductive generateN : nat -> valuation * cmd -> list nat -> Prop :=
+  Inductive generateN : nat -> valuation * cmd -> list (option nat) -> Prop :=
  | GenDoneN : forall vc,
      generateN 0 vc []
+  | GenSkupN : forall v,
+      generateN 0 (v, Skip) [None]
  | GenSilentN : forall sc vc vc' ns,
      cstep vc None vc'
      -> generateN sc vc' ns
@ -1024,7 +1111,7 @@ Section simulation_multiple.
  | GenOutputN : forall sc vc n vc' ns,
      cstep vc (Some n) vc'
      -> generateN sc vc' ns
-      -> generateN (S sc) vc (n :: ns).
+      -> generateN (S sc) vc (Some n :: ns).

  (* We won't comment on the other proof details, though they could be
   * interesting reading. *)
@ -1054,6 +1141,11 @@ Section simulation_multiple.
  Proof.
    clear; induct 1; simplify; eauto.

+    invert H; eauto.
+    invert H0.
+    invert H3.
+    invert H4.
+
    invert H1; eauto.
    eapply deterministic in H; eauto.
    propositional; subst.
@ -1066,6 +1158,13 @@ Section simulation_multiple.
    equality.
  Qed.

+  Lemma termination_is_last : forall sc vc ns,
+      generateN sc vc (None :: ns)
+      -> ns = [].
+  Proof.
+    induct 1; eauto.
+  Qed.
+
  Lemma simulation_multiple_bwd' : forall sc sc', sc' < sc
    -> forall vc2 ns, generateN sc' vc2 ns
    -> forall vc1, R vc1 vc2
@ -1078,17 +1177,39 @@ Section simulation_multiple.
    cases sc'.
    invert H0.
    auto.
+
+    cases vc1.
+    assert (c = Skip \/ exists v' l c', cstep (v0, c) l (v', c')) by apply skip_or_step.
+    first_order; subst.
+    auto.
+    eapply one_step in H1; eauto.
+    first_order.
+    invert H1.
+    invert H2.
+    invert H5.
+    invert H6.
+    invert H4.
+    invert H7.
+    invert H8.
+
    cases vc1; cases vc2.
    assert (c = Skip \/ exists v' l c', cstep (v, c) l (v', c')) by apply skip_or_step.
    first_order; subst.
    apply agree_on_termination in H1; subst.
    cases ns; auto.
+    cases o.
    exfalso; eauto.
+    eapply termination_is_last in H0; subst.
+    auto.
+
    eapply one_step in H1; eauto.
    first_order.
    eapply generateN_silent_cstep in H0; eauto.
    first_order.
    invert H5; auto.
+    invert H3.
+    invert H7.
+    invert H8.
    eapply deterministic in H3; eauto.
    propositional; subst.
    econstructor.
--- a/frap_book.tex
+++ b/frap_book.tex
@ -2311,17 +2311,19 @@ $$\infer{\smallstepcl{(v, C[c])}{\ell}{(v', C[c'])}}{
 }$$

 \newcommand{\Tr}[1]{\mathsf{Tr}(#1)}
+\newcommand{\terminate}[0]{\mathsf{terminate}}

 To reason about infinite executions, we need a new abstraction, compared to what has worked in our invariant-based proofs so far.
-That abstraction will be \emph{traces}\index{traces}, sequences of outputs that a program might be observed to generate.
+That abstraction will be \emph{traces}\index{traces}, sequences of outputs (and termination events) that a program might be observed to generate.
 We define a command's trace set inductively.
 Recall that $\cdot$ is the empty list, while $\bowtie$ does list concatenation.
 $$\infer{\cdot \in \Tr{s}}{}
+\quad \infer{\terminate \in \Tr{(v, \skipe)}}{}
 \quad \infer{t \in \Tr{s}}{
  \smallstepcl{s}{\silent}{s'}
  & t \in \Tr{s'}
 }
-\quad \infer{\concat{n}{t} \in \Tr{s}}{
+\quad \infer{\concat{\outp{n}}{t} \in \Tr{s}}{
  \smallstepcl{s}{n}{s'}
  & t \in \Tr{s'}
 }$$
@ -2385,7 +2387,7 @@ Instead of showing that every step preserves a one-state predicate, we show that
 The simulation approach is as general for relating programs as the invariant approach is for verifying individual programs.

 \begin{theorem}
-  If there exists a simulation $R$ such that $s_1 \; R \; s_2$, then $\treq{s_1}{s_2}$.
+  \label{simulation_ok}If there exists a simulation $R$ such that $s_1 \; R \; s_2$, then $\treq{s_1}{s_2}$.
 \end{theorem}
 \begin{proof}
  We prove the two trace-inclusion directions separately.
@ -2405,7 +2407,7 @@ The simulation approach is as general for relating programs as the invariant app
 We can apply this very general principle to constant folding.

 \begin{theorem}
-  For any $v$ and $c$, $\treq{(v, c)}{(v, \cfold{c})}$.
+  \label{cfold_ok}For any $v$ and $c$, $\treq{(v, c)}{(v, \cfold{c})}$.
 \end{theorem}
 \begin{proof}
  By a simulation argument using this relation:
@ -2418,6 +2420,93 @@ We can apply this very general principle to constant folding.
 \end{proof}


+\section{Simulations That Allow Skipping Steps}
+
+\newcommand{\cfoldt}[1]{\mathsf{cfold}_2(#1)}
+
+Consider an evolution of our constant-folding optimization to take advantage of known values of $\mathsf{if}$ test expressions.
+Depending on whether the value is zero, we can replace the whole $\mathsf{if}$ with one of its two cases.
+We will write $\cfoldt{c}$ for this expanded optimization and work up to proving it sound, too.
+However, we can no longer use last section's definition of simulation!
+The reason is that optimizations intentionally cut down on steps that a program needs to execute.
+Some steps of the source program now have no matching steps of the target program, say when we are stepping an $\mathsf{if}$ whose test expression had a known value.
+
+Let's take a first crack at making simulation more flexible.
+
+\begin{definition}[Simulation relation with skipping (\emph{faulty} version!)]
+  We say that binary relation $R$ over states of our object language is a \emph{simulation relation with skipping} iff:
+  \begin{enumerate}
+    \item Whenever $(v_1, \skipe) \; R \; (v_2, c_2)$, it follows that $c_2 = \skipe$.
+    \item Whenever $s_1 \; R \; s_2$ and $\smallstepcl{s_1}{\ell}{s'_1}$, then either:
+      \begin{enumerate}
+        \item there exists $s'_2$ such that $\smallstepcl{s_2}{\ell}{s'_2}$ and $s'_1 \; R \; s'_2$,
+        \item or $\ell = \silent$ and $s'_1 \; R \; s_2$.
+      \end{enumerate}
+  \end{enumerate}
+\end{definition}
+
+In other words, to match a silent step, it suffices to do nothing, so long as $R$ still holds afterward.
+
+\newcommand{\addad}[1]{\mathsf{withAds}(#1)}
+
+We didn't mark the definition as \emph{faulty} for nothing.
+It actually does not imply trace equivalence.
+Consider a questionable ``optimization'' defined as $\addad{\while{1}{\skipe}} = \while{1}{\outp{0}}$, and $\addad{c} = c$ for all other $c$.
+It adds a little extra advertisement into a particular infinite loop.
+Now we define a candidate simulation relation.
+\begin{eqnarray*}
+  (v_1, c_1) \; R \; (v_2, c_2) &=& v_1 = v_2 \land c_1 \in \{\while{1}{\skipe}, (\skipe; \while{1}{\skipe})\}
+\end{eqnarray*}
+This suspicious relation records nothing about $c_2$.
+The $\skipe$ condition of simulations is handled trivially, as we can see by inspection that $R$ does not allow $c_1$ to be $\skipe$.
+Checking the execution-matching condition of simulations, $c_1$ is either $\while{1}{\skipe}$ or $(\skipe; \while{1}{\skipe})$, each of which steps silently to the other.
+We may match either step by keeping $c_2$ in place, as $R$ does not constrain $c_2$ at all.
+Thus, $R$ is a simulation relation with skipping, and, for $c = \while{1}{\skipe}$, it relates $c$ to $\addad{c}$.
+
+From here we expect to conclude trace equivalence.
+However, clearly $\mathsf{withAds}$ can turn a program that never outputs into a program that outputs infinitely often!
+
+Let's patch our definition.
+
+\begin{definition}[Simulation relation with skipping]
+  We say that an $\mathbb N$-indexed family of binary relations $R_n$ over states of our object language is a \emph{simulation relation with skipping} iff:
+  \begin{enumerate}
+    \item Whenever $(v_1, \skipe) \; R_n \; (v_2, c_2)$, it follows that $c_2 = \skipe$.
+    \item Whenever $s_1 \; R_n \; s_2$ and $\smallstepcl{s_1}{\ell}{s'_1}$, then either:
+      \begin{enumerate}
+        \item there exist $n'$ and $s'_2$ such that $\smallstepcl{s_2}{\ell}{s'_2}$ and $s'_1 \; R_{n'} \; s'_2$,
+        \item or $n > 0$, $\ell = \silent$, and $s'_1 \; R_{n-1} \; s_2$.
+      \end{enumerate}
+  \end{enumerate}
+\end{definition}
+
+This new version imposes a finite limit $n$ at any point, on how many times the righthand side may match lefthand steps without stepping itself.
+Our bad counterexample fails to satisfy the conditions, because eventually the starting step count $n$ will be used up, and the incorrect ``optimized'' program will be forced to reveal itself by taking a step that outputs.
+
+\begin{theorem}
+  If there exists a simulation with skipping $R$ such that $s_1 \; R_n \; s_2$, then $\treq{s_1}{s_2}$.
+\end{theorem}
+\begin{proof}
+  The proof is fairly similar to that of Theorem \ref{simulation_ok}.
+  To show termination preservation in the backward direction, we find ourselves proving a lemma by induction on $n$.
+\end{proof}
+
+\newcommand{\countIfs}[1]{\mathsf{countIfs}(#1)}
+
+\begin{theorem}
+  For any $v$ and $c$, $\treq{(v, c)}{(v, \cfoldt{c})}$.
+\end{theorem}
+\begin{proof}
+  By a simulation argument (with skipping) using this relation:
+  \begin{eqnarray*}
+    (v_1, c_1) \; R_n \; (v_2, c_2) &=& v_1 = v_2 \land c_2 = \cfoldt{c_1} \land \countIfs{c_1} < n
+  \end{eqnarray*}
+  We rely on a simple helper function $\countIfs{c}$ to count how many $\mathsf{If}$ nodes appear in the syntax of $c$.
+  This notion turns out to be a conservative upper bound on how many times in a row we will need to let lefthand steps go unmatched on the right.
+  The rest of the proof proceeds essentially the same way as in Theorem \ref{cfold_ok}.
+\end{proof}
+
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 \chapter{Lambda Calculus and Simple Type Safety}