feat(library): add Meng&Paulson heuristic for selecting theorems

This commit is contained in:
Leonardo de Moura 2015-10-14 19:34:53 -07:00
parent d64b410bbd
commit 023ec1ba76
7 changed files with 210 additions and 26 deletions

View file

@ -38,6 +38,7 @@ Author: Leonardo de Moura
#include "library/projection.h"
#include "library/private.h"
#include "library/decl_stats.h"
#include "library/meng_paulson.h"
#include "library/definitional/projection.h"
#include "library/simplifier/simp_rule_set.h"
#include "compiler/preprocess_rec.h"
@ -1109,12 +1110,13 @@ static void display_name_set(parser & p, name const & n, name_set const & s) {
static environment decl_stats_cmd(parser & p) {
environment const & env = p.env();
std::cout << "Use sets\n";
io_state_stream out = p.regular_stream();
out << "Use sets\n";
env.for_each_declaration([&](declaration const & d) {
if ((d.is_theorem() || d.is_axiom()) && !is_private(env, d.get_name()))
display_name_set(p, d.get_name(), get_use_set(env, d.get_name()));
});
std::cout << "Used-by sets\n";
out << "Used-by sets\n";
env.for_each_declaration([&](declaration const & d) {
if (!d.is_theorem() && !d.is_axiom() && !is_private(env, d.get_name()))
display_name_set(p, d.get_name(), get_used_by_set(env, d.get_name()));
@ -1122,32 +1124,54 @@ static environment decl_stats_cmd(parser & p) {
return env;
}
static environment relevant_thms_cmd(parser & p) {
environment const & env = p.env();
name_set R;
while (p.curr_is_identifier()) {
R.insert(p.check_constant_next("invalid #relevant_thms command, constant expected"));
}
name_set TS = get_relevant_thms(env, p.get_options(), R);
io_state_stream out = p.regular_stream();
out << "{";
bool first = true;
TS.for_each([&](name const & T) {
if (first)
first = false;
else
out << ", ";
out << T;
});
out << "}";
return env;
}
void init_cmd_table(cmd_table & r) {
add_cmd(r, cmd_info("open", "create aliases for declarations, and use objects defined in other namespaces",
add_cmd(r, cmd_info("open", "create aliases for declarations, and use objects defined in other namespaces",
open_cmd));
add_cmd(r, cmd_info("export", "create abbreviations for declarations, "
add_cmd(r, cmd_info("export", "create abbreviations for declarations, "
"and export objects defined in other namespaces", export_cmd));
add_cmd(r, cmd_info("override", "override notation declarations using the ones defined in the given namespace",
add_cmd(r, cmd_info("override", "override notation declarations using the ones defined in the given namespace",
override_cmd));
add_cmd(r, cmd_info("set_option", "set configuration option", set_option_cmd));
add_cmd(r, cmd_info("exit", "exit", exit_cmd));
add_cmd(r, cmd_info("print", "print a string", print_cmd));
add_cmd(r, cmd_info("section", "open a new section", section_cmd));
add_cmd(r, cmd_info("namespace", "open a new namespace", namespace_cmd));
add_cmd(r, cmd_info("end", "close the current namespace/section", end_scoped_cmd));
add_cmd(r, cmd_info("check", "type check given expression, and display its type", check_cmd));
add_cmd(r, cmd_info("eval", "evaluate given expression", eval_cmd));
add_cmd(r, cmd_info("find_decl", "find definitions and/or theorems", find_cmd));
add_cmd(r, cmd_info("local", "define local attributes or notation", local_cmd));
add_cmd(r, cmd_info("help", "brief description of available commands and options", help_cmd));
add_cmd(r, cmd_info("init_quotient", "initialize quotient type computational rules", init_quotient_cmd));
add_cmd(r, cmd_info("init_hits", "initialize builtin HITs", init_hits_cmd));
add_cmd(r, cmd_info("#erase_cache", "erase cached definition (for debugging purposes)", erase_cache_cmd));
add_cmd(r, cmd_info("#projections", "generate projections for inductive datatype (for debugging purposes)", projections_cmd));
add_cmd(r, cmd_info("#telescope_eq", "(for debugging purposes)", telescope_eq_cmd));
add_cmd(r, cmd_info("#compile", "(for debugging purposes)", compile_cmd));
add_cmd(r, cmd_info("#accessible", "(for debugging purposes) display number of accessible declarations for blast tactic", accessible_cmd));
add_cmd(r, cmd_info("#decl_stats", "(for debugging purposes) display declaration statistics", decl_stats_cmd));
add_cmd(r, cmd_info("set_option", "set configuration option", set_option_cmd));
add_cmd(r, cmd_info("exit", "exit", exit_cmd));
add_cmd(r, cmd_info("print", "print a string", print_cmd));
add_cmd(r, cmd_info("section", "open a new section", section_cmd));
add_cmd(r, cmd_info("namespace", "open a new namespace", namespace_cmd));
add_cmd(r, cmd_info("end", "close the current namespace/section", end_scoped_cmd));
add_cmd(r, cmd_info("check", "type check given expression, and display its type", check_cmd));
add_cmd(r, cmd_info("eval", "evaluate given expression", eval_cmd));
add_cmd(r, cmd_info("find_decl", "find definitions and/or theorems", find_cmd));
add_cmd(r, cmd_info("local", "define local attributes or notation", local_cmd));
add_cmd(r, cmd_info("help", "brief description of available commands and options", help_cmd));
add_cmd(r, cmd_info("init_quotient", "initialize quotient type computational rules", init_quotient_cmd));
add_cmd(r, cmd_info("init_hits", "initialize builtin HITs", init_hits_cmd));
add_cmd(r, cmd_info("#erase_cache", "erase cached definition (for debugging purposes)", erase_cache_cmd));
add_cmd(r, cmd_info("#projections", "generate projections for inductive datatype (for debugging purposes)", projections_cmd));
add_cmd(r, cmd_info("#telescope_eq", "(for debugging purposes)", telescope_eq_cmd));
add_cmd(r, cmd_info("#compile", "(for debugging purposes)", compile_cmd));
add_cmd(r, cmd_info("#accessible", "(for debugging purposes) display number of accessible declarations for blast tactic", accessible_cmd));
add_cmd(r, cmd_info("#decl_stats", "(for debugging purposes) display declaration statistics", decl_stats_cmd));
add_cmd(r, cmd_info("#relevant_thms", "(for debugging purposes) select relevant theorems using Meng&Paulson heuristic", relevant_thms_cmd));
register_decl_cmds(r);
register_inductive_cmd(r);

View file

@ -119,7 +119,7 @@ void init_token_table(token_table & t) {
"add_begin_end_tactic", "set_begin_end_tactic", "instance", "class",
"multiple_instances", "find_decl", "attribute", "persistent",
"include", "omit", "migrate", "init_quotient", "init_hits", "#erase_cache", "#projections", "#telescope_eq",
"#compile", "#accessible", "#decl_stats", nullptr};
"#compile", "#accessible", "#decl_stats", "#relevant_thms", nullptr};
pair<char const *, char const *> aliases[] =
{{g_lambda_unicode, "fun"}, {"forall", "Pi"}, {g_forall_unicode, "Pi"}, {g_pi_unicode, "Pi"},

View file

@ -16,4 +16,4 @@ add_library(library OBJECT deep_copy.cpp expr_lt.cpp io_state.cpp occurs.cpp
relation_manager.cpp export.cpp user_recursors.cpp
class_instance_synth.cpp idx_metavar.cpp composition_manager.cpp
tc_multigraph.cpp noncomputable.cpp aux_recursors.cpp norm_num.cpp
decl_stats.cpp)
decl_stats.cpp meng_paulson.cpp)

View file

@ -42,6 +42,7 @@ Author: Leonardo de Moura
#include "library/noncomputable.h"
#include "library/aux_recursors.h"
#include "library/decl_stats.h"
#include "library/meng_paulson.h"
namespace lean {
void initialize_library_module() {
@ -83,9 +84,11 @@ void initialize_library_module() {
initialize_noncomputable();
initialize_aux_recursors();
initialize_decl_stats();
initialize_meng_paulson();
}
void finalize_library_module() {
finalize_meng_paulson();
finalize_decl_stats();
finalize_aux_recursors();
finalize_noncomputable();

View file

@ -0,0 +1,140 @@
/*
Copyright (c) 2015 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
This module implements a heuristic for selecting relevant theorems based on
the approach described at
"Lightweight relevance filtering for machine-generated resolution problems"
Jia Meng and Larry Paulson
Journal of Applied Logic 7 2009
*/
#include <math.h>
#include "util/sexpr/option_declarations.h"
#include "kernel/environment.h"
#include "library/decl_stats.h"
#include "library/private.h"
#ifndef LEAN_DEFAULT_MENG_PAULSON_P
#define LEAN_DEFAULT_MENG_PAULSON_P 0.6
#endif
#ifndef LEAN_DEFAULT_MENG_PAULSON_C
#define LEAN_DEFAULT_MENG_PAULSON_C 2.4
#endif
namespace lean {
static name * g_meng_paulson_p = nullptr;
static name * g_meng_paulson_c = nullptr;
void initialize_meng_paulson() {
g_meng_paulson_p = new name{"meng_paulson", "p"};
g_meng_paulson_c = new name{"meng_paulson", "c"};
register_double_option(*g_meng_paulson_p, LEAN_DEFAULT_MENG_PAULSON_P,
"(theorem selection) control parameter for the Meng&Paulson theorem selection heuristic"
"(for details see paper \"Lightweight relevance filtering for machine-generated resolution problems)\"");
register_double_option(*g_meng_paulson_c, LEAN_DEFAULT_MENG_PAULSON_C,
"(theorem selection) control parameter for the Meng&Paulson theorem selection heuristic"
"(for details see paper \"Lightweight relevance filtering for machine-generated resolution problems)\"");
}
void finalize_meng_paulson() {
delete g_meng_paulson_p;
delete g_meng_paulson_c;
}
double get_meng_paulson_p(options const & o) {
return o.get_double(*g_meng_paulson_p, LEAN_DEFAULT_MENG_PAULSON_P);
}
double get_meng_paulson_c(options const & o) {
return o.get_double(*g_meng_paulson_c, LEAN_DEFAULT_MENG_PAULSON_C);
}
class relevant_thms_fn {
environment m_env;
double m_p;
double m_c;
name_set m_relevant;
double get_weight(name const & n) const {
double r = get_num_occs(m_env, n);
return 1.0 + 2.0 / log(r + 1.0);
}
double get_thm_score(name const & n) const {
name_set s = get_use_set(m_env, n);
unsigned IR = 0;
double M = 0.0;
s.for_each([&](name const & F) {
if (m_relevant.contains(F)) {
M += get_weight(F);
} else {
IR++;
}
});
if (M > 0.0)
return M / (M + IR);
else
return 0.0;
}
public:
relevant_thms_fn(environment const & env, double p, double c, name_set const & rel):
m_env(env), m_p(p), m_c(c), m_relevant(rel) {
lean_assert(c > 0.0);
}
name_set operator()() {
name_set A;
while (true) {
name_set Rel;
m_relevant.for_each([&](name const & c) {
name_set used_by = get_used_by_set(m_env, c);
used_by.for_each([&](name const & T) {
declaration const & T_decl = m_env.get(T);
if (A.contains(T))
return; // T is already in the result set
if (!T_decl.is_theorem() && !T_decl.is_axiom())
return; // we only care about axioms and theorems
if (is_private(m_env, T))
return; // we ignore private decls
double M = get_thm_score(T);
if (M < m_p)
return; // score is to low
Rel.insert(T);
A.insert(T);
});
});
if (Rel.empty())
break;
// include symbols of new theorems in m_relevant
Rel.for_each([&](name const & T) {
name_set uses = get_use_set(m_env, T);
uses.for_each([&](name const & c) {
declaration const & c_decl = m_env.get(c);
if (c_decl.is_theorem() || c_decl.is_axiom())
return; // we ignore theorems occurring in types
if (is_private(m_env, c))
return; // we ignore private decls
m_relevant.insert(c);
});
});
m_p = m_p + (1.0 - m_p) / m_c;
}
return A;
}
};
name_set get_relevant_thms(environment const & env, double p, double c,
name_set const & relevant_symbols) {
return relevant_thms_fn(env, p, c, relevant_symbols)();
}
name_set get_relevant_thms(environment const & env, options const & o, name_set const & relevant_symbols) {
return relevant_thms_fn(env, get_meng_paulson_p(o), get_meng_paulson_c(o), relevant_symbols)();
}
}

View file

@ -0,0 +1,16 @@
/*
Copyright (c) 2015 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
*/
#pragma once
#include "util/name_set.h"
#include "kernel/environment.h"
namespace lean {
name_set get_relevant_thms(environment const & env, double p, double c, name_set const & relevant_symbols);
name_set get_relevant_thms(environment const & env, options const & o, name_set const & relevant_symbols);
void initialize_meng_paulson();
void finalize_meng_paulson();
}

View file

@ -39,4 +39,5 @@ option_declarations const & get_option_declarations();
void register_option(name const & n, option_kind k, char const * default_value, char const * description);
#define register_bool_option(n, v, d) register_option(n, BoolOption, LEAN_STR(v), d)
#define register_unsigned_option(n, v, d) register_option(n, UnsignedOption, LEAN_STR(v), d)
#define register_double_option(n, v, d) register_option(n, DoubleOption, LEAN_STR(v), d)
}