CompilerCorrectness chapter: simulation with skipping, after adding termination as an observable

This commit is contained in:
Adam Chlipala 2017-03-19 18:07:21 -04:00
parent 9974e130f0
commit 6c1af44f95
2 changed files with 221 additions and 11 deletions

View file

@ -109,10 +109,14 @@ Inductive cstep : valuation * cmd -> option nat -> valuation * cmd -> Prop :=
(* To characterize correct compilation, it is helpful to define a relation to
* capture which output _traces_ a command might generate. Note that, for us, a
* trace is a list of output values, where [None] labels are simply dropped. *)
Inductive generate : valuation * cmd -> list nat -> Prop :=
* trace is a list of output values and/or terminating markers. We drop silent
* labels as we run, and we use [Some n] for outputting of [n] and [None] for
* termination. *)
Inductive generate : valuation * cmd -> list (option nat) -> Prop :=
| GenDone : forall vc,
generate vc []
| GenSkip : forall v,
generate (v, Skip) [None]
| GenSilent : forall vc vc' ns,
cstep vc None vc'
-> generate vc' ns
@ -120,7 +124,7 @@ Inductive generate : valuation * cmd -> list nat -> Prop :=
| GenOutput : forall vc n vc' ns,
cstep vc (Some n) vc'
-> generate vc' ns
-> generate vc (n :: ns).
-> generate vc (Some n :: ns).
Hint Constructors plug step0 cstep generate.
@ -178,7 +182,7 @@ Hint Extern 1 (interp _ _ = _) => simplify; congruence.
Hint Extern 1 (interp _ _ <> _) => simplify; congruence.
Theorem first_few_values :
generate ($0, month_boundaries_in_days) [28; 56].
generate ($0, month_boundaries_in_days) [Some 28; Some 56].
Proof.
unfold month_boundaries_in_days.
eapply GenSilent.
@ -385,6 +389,10 @@ Section simulation.
Proof.
induct 1; simplify; eauto.
cases vc2.
apply agree_on_termination in H; subst.
auto.
eapply one_step in H; eauto.
first_order.
eauto.
@ -409,6 +417,16 @@ Section simulation.
Proof.
induct 1; simplify; eauto.
cases vc1.
assert (c = Skip \/ exists v' l c', cstep (v0, c) l (v', c')) by apply skip_or_step.
first_order; subst.
auto.
eapply one_step in H; eauto.
first_order.
invert H.
invert H4.
invert H5.
cases vc1; cases vc.
assert (c = Skip \/ exists v' l c', cstep (v, c) l (v', c')) by apply skip_or_step.
first_order; subst.
@ -538,6 +556,10 @@ Proof.
induct 1; simplify; eauto.
invert H1; auto.
invert H.
invert H3.
invert H4.
eapply deterministic in H; eauto.
propositional; subst.
auto.
@ -555,7 +577,7 @@ Proof.
Qed.
Lemma generate_Skip : forall v a ns,
generate (v, Skip) (a :: ns)
generate (v, Skip) (Some a :: ns)
-> False.
Proof.
induct 1; simplify.
@ -605,6 +627,11 @@ Section simulation_skipping.
Proof.
induct 1; simplify; eauto.
cases vc2.
apply agree_on_termination in H.
subst.
auto.
eapply one_step in H; eauto.
first_order.
eauto.
@ -664,12 +691,57 @@ Section simulation_skipping.
eauto 6.
Qed.
Lemma step_to_termination : forall vc v,
silent_cstep^* vc (v, Skip)
-> generate vc [None].
Proof.
clear; induct 1; eauto.
Qed.
Hint Resolve step_to_termination.
Lemma R_Skip : forall n vc1 v,
R n vc1 (v, Skip)
-> exists v', silent_cstep^* vc1 (v', Skip).
Proof.
induct n; simplify.
cases vc1.
assert (c = Skip \/ exists v' l c', cstep (v0, c) l (v', c')) by apply skip_or_step.
first_order; subst.
eauto.
eapply one_step in H; eauto.
first_order.
equality.
invert H.
invert H4.
invert H5.
cases vc1.
assert (c = Skip \/ exists v' l c', cstep (v0, c) l (v', c')) by apply skip_or_step.
first_order; subst.
eauto.
eapply one_step in H; eauto.
first_order; subst.
invert H.
apply IHn in H2.
first_order.
eauto.
invert H.
invert H4.
invert H5.
Qed.
Lemma simulation_skipping_bwd' : forall ns vc2, generate vc2 ns
-> forall n vc1, R n vc1 vc2
-> generate vc1 ns.
Proof.
induct 1; simplify; eauto.
cases vc1.
apply R_Skip in H; first_order.
eauto.
eapply match_step in H1; eauto.
first_order.
eauto.
@ -768,6 +840,15 @@ Qed.
Hint Resolve plug_countIfs.
Hint Extern 1 (interp ?e _ = _) =>
match goal with
| [ H : cfoldArith e = _ |- _ ] => rewrite <- cfoldArith_ok; rewrite H
end.
Hint Extern 1 (interp ?e _ <> _) =>
match goal with
| [ H : cfoldArith e = _ |- _ ] => rewrite <- cfoldArith_ok; rewrite H
end.
(* The final proof is fairly straightforward now. *)
Lemma cfold_ok : forall v c,
(v, c) =| (v, cfold c).
@ -996,6 +1077,10 @@ Section simulation_multiple.
Proof.
induct 1; simplify; eauto.
cases vc2.
apply agree_on_termination in H; subst.
auto.
eapply one_step in H; eauto.
first_order.
eauto.
@ -1014,9 +1099,11 @@ Section simulation_multiple.
(* The backward proof essentially proceeds by strong induction on
* _how many steps it took to generate a trace_, which we facilitate by
* defining a [generate] variant parameterized by a step count. *)
Inductive generateN : nat -> valuation * cmd -> list nat -> Prop :=
Inductive generateN : nat -> valuation * cmd -> list (option nat) -> Prop :=
| GenDoneN : forall vc,
generateN 0 vc []
| GenSkupN : forall v,
generateN 0 (v, Skip) [None]
| GenSilentN : forall sc vc vc' ns,
cstep vc None vc'
-> generateN sc vc' ns
@ -1024,7 +1111,7 @@ Section simulation_multiple.
| GenOutputN : forall sc vc n vc' ns,
cstep vc (Some n) vc'
-> generateN sc vc' ns
-> generateN (S sc) vc (n :: ns).
-> generateN (S sc) vc (Some n :: ns).
(* We won't comment on the other proof details, though they could be
* interesting reading. *)
@ -1054,6 +1141,11 @@ Section simulation_multiple.
Proof.
clear; induct 1; simplify; eauto.
invert H; eauto.
invert H0.
invert H3.
invert H4.
invert H1; eauto.
eapply deterministic in H; eauto.
propositional; subst.
@ -1066,6 +1158,13 @@ Section simulation_multiple.
equality.
Qed.
Lemma termination_is_last : forall sc vc ns,
generateN sc vc (None :: ns)
-> ns = [].
Proof.
induct 1; eauto.
Qed.
Lemma simulation_multiple_bwd' : forall sc sc', sc' < sc
-> forall vc2 ns, generateN sc' vc2 ns
-> forall vc1, R vc1 vc2
@ -1078,17 +1177,39 @@ Section simulation_multiple.
cases sc'.
invert H0.
auto.
cases vc1.
assert (c = Skip \/ exists v' l c', cstep (v0, c) l (v', c')) by apply skip_or_step.
first_order; subst.
auto.
eapply one_step in H1; eauto.
first_order.
invert H1.
invert H2.
invert H5.
invert H6.
invert H4.
invert H7.
invert H8.
cases vc1; cases vc2.
assert (c = Skip \/ exists v' l c', cstep (v, c) l (v', c')) by apply skip_or_step.
first_order; subst.
apply agree_on_termination in H1; subst.
cases ns; auto.
cases o.
exfalso; eauto.
eapply termination_is_last in H0; subst.
auto.
eapply one_step in H1; eauto.
first_order.
eapply generateN_silent_cstep in H0; eauto.
first_order.
invert H5; auto.
invert H3.
invert H7.
invert H8.
eapply deterministic in H3; eauto.
propositional; subst.
econstructor.

View file

@ -2311,17 +2311,19 @@ $$\infer{\smallstepcl{(v, C[c])}{\ell}{(v', C[c'])}}{
}$$
\newcommand{\Tr}[1]{\mathsf{Tr}(#1)}
\newcommand{\terminate}[0]{\mathsf{terminate}}
To reason about infinite executions, we need a new abstraction, compared to what has worked in our invariant-based proofs so far.
That abstraction will be \emph{traces}\index{traces}, sequences of outputs that a program might be observed to generate.
That abstraction will be \emph{traces}\index{traces}, sequences of outputs (and termination events) that a program might be observed to generate.
We define a command's trace set inductively.
Recall that $\cdot$ is the empty list, while $\bowtie$ does list concatenation.
$$\infer{\cdot \in \Tr{s}}{}
\quad \infer{\terminate \in \Tr{(v, \skipe)}}{}
\quad \infer{t \in \Tr{s}}{
\smallstepcl{s}{\silent}{s'}
& t \in \Tr{s'}
}
\quad \infer{\concat{n}{t} \in \Tr{s}}{
\quad \infer{\concat{\outp{n}}{t} \in \Tr{s}}{
\smallstepcl{s}{n}{s'}
& t \in \Tr{s'}
}$$
@ -2385,7 +2387,7 @@ Instead of showing that every step preserves a one-state predicate, we show that
The simulation approach is as general for relating programs as the invariant approach is for verifying individual programs.
\begin{theorem}
If there exists a simulation $R$ such that $s_1 \; R \; s_2$, then $\treq{s_1}{s_2}$.
\label{simulation_ok}If there exists a simulation $R$ such that $s_1 \; R \; s_2$, then $\treq{s_1}{s_2}$.
\end{theorem}
\begin{proof}
We prove the two trace-inclusion directions separately.
@ -2405,7 +2407,7 @@ The simulation approach is as general for relating programs as the invariant app
We can apply this very general principle to constant folding.
\begin{theorem}
For any $v$ and $c$, $\treq{(v, c)}{(v, \cfold{c})}$.
\label{cfold_ok}For any $v$ and $c$, $\treq{(v, c)}{(v, \cfold{c})}$.
\end{theorem}
\begin{proof}
By a simulation argument using this relation:
@ -2418,6 +2420,93 @@ We can apply this very general principle to constant folding.
\end{proof}
\section{Simulations That Allow Skipping Steps}
\newcommand{\cfoldt}[1]{\mathsf{cfold}_2(#1)}
Consider an evolution of our constant-folding optimization to take advantage of known values of $\mathsf{if}$ test expressions.
Depending on whether the value is zero, we can replace the whole $\mathsf{if}$ with one of its two cases.
We will write $\cfoldt{c}$ for this expanded optimization and work up to proving it sound, too.
However, we can no longer use last section's definition of simulation!
The reason is that optimizations intentionally cut down on steps that a program needs to execute.
Some steps of the source program now have no matching steps of the target program, say when we are stepping an $\mathsf{if}$ whose test expression had a known value.
Let's take a first crack at making simulation more flexible.
\begin{definition}[Simulation relation with skipping (\emph{faulty} version!)]
We say that binary relation $R$ over states of our object language is a \emph{simulation relation with skipping} iff:
\begin{enumerate}
\item Whenever $(v_1, \skipe) \; R \; (v_2, c_2)$, it follows that $c_2 = \skipe$.
\item Whenever $s_1 \; R \; s_2$ and $\smallstepcl{s_1}{\ell}{s'_1}$, then either:
\begin{enumerate}
\item there exists $s'_2$ such that $\smallstepcl{s_2}{\ell}{s'_2}$ and $s'_1 \; R \; s'_2$,
\item or $\ell = \silent$ and $s'_1 \; R \; s_2$.
\end{enumerate}
\end{enumerate}
\end{definition}
In other words, to match a silent step, it suffices to do nothing, so long as $R$ still holds afterward.
\newcommand{\addad}[1]{\mathsf{withAds}(#1)}
We didn't mark the definition as \emph{faulty} for nothing.
It actually does not imply trace equivalence.
Consider a questionable ``optimization'' defined as $\addad{\while{1}{\skipe}} = \while{1}{\outp{0}}$, and $\addad{c} = c$ for all other $c$.
It adds a little extra advertisement into a particular infinite loop.
Now we define a candidate simulation relation.
\begin{eqnarray*}
(v_1, c_1) \; R \; (v_2, c_2) &=& v_1 = v_2 \land c_1 \in \{\while{1}{\skipe}, (\skipe; \while{1}{\skipe})\}
\end{eqnarray*}
This suspicious relation records nothing about $c_2$.
The $\skipe$ condition of simulations is handled trivially, as we can see by inspection that $R$ does not allow $c_1$ to be $\skipe$.
Checking the execution-matching condition of simulations, $c_1$ is either $\while{1}{\skipe}$ or $(\skipe; \while{1}{\skipe})$, each of which steps silently to the other.
We may match either step by keeping $c_2$ in place, as $R$ does not constrain $c_2$ at all.
Thus, $R$ is a simulation relation with skipping, and, for $c = \while{1}{\skipe}$, it relates $c$ to $\addad{c}$.
From here we expect to conclude trace equivalence.
However, clearly $\mathsf{withAds}$ can turn a program that never outputs into a program that outputs infinitely often!
Let's patch our definition.
\begin{definition}[Simulation relation with skipping]
We say that an $\mathbb N$-indexed family of binary relations $R_n$ over states of our object language is a \emph{simulation relation with skipping} iff:
\begin{enumerate}
\item Whenever $(v_1, \skipe) \; R_n \; (v_2, c_2)$, it follows that $c_2 = \skipe$.
\item Whenever $s_1 \; R_n \; s_2$ and $\smallstepcl{s_1}{\ell}{s'_1}$, then either:
\begin{enumerate}
\item there exist $n'$ and $s'_2$ such that $\smallstepcl{s_2}{\ell}{s'_2}$ and $s'_1 \; R_{n'} \; s'_2$,
\item or $n > 0$, $\ell = \silent$, and $s'_1 \; R_{n-1} \; s_2$.
\end{enumerate}
\end{enumerate}
\end{definition}
This new version imposes a finite limit $n$ at any point, on how many times the righthand side may match lefthand steps without stepping itself.
Our bad counterexample fails to satisfy the conditions, because eventually the starting step count $n$ will be used up, and the incorrect ``optimized'' program will be forced to reveal itself by taking a step that outputs.
\begin{theorem}
If there exists a simulation with skipping $R$ such that $s_1 \; R_n \; s_2$, then $\treq{s_1}{s_2}$.
\end{theorem}
\begin{proof}
The proof is fairly similar to that of Theorem \ref{simulation_ok}.
To show termination preservation in the backward direction, we find ourselves proving a lemma by induction on $n$.
\end{proof}
\newcommand{\countIfs}[1]{\mathsf{countIfs}(#1)}
\begin{theorem}
For any $v$ and $c$, $\treq{(v, c)}{(v, \cfoldt{c})}$.
\end{theorem}
\begin{proof}
By a simulation argument (with skipping) using this relation:
\begin{eqnarray*}
(v_1, c_1) \; R_n \; (v_2, c_2) &=& v_1 = v_2 \land c_2 = \cfoldt{c_1} \land \countIfs{c_1} < n
\end{eqnarray*}
We rely on a simple helper function $\countIfs{c}$ to count how many $\mathsf{If}$ nodes appear in the syntax of $c$.
This notion turns out to be a conservative upper bound on how many times in a row we will need to let lefthand steps go unmatched on the right.
The rest of the proof proceeds essentially the same way as in Theorem \ref{cfold_ok}.
\end{proof}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\chapter{Lambda Calculus and Simple Type Safety}