From 023ec1ba762f3ce45a9c2cc9620b3a158d94bf72 Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Wed, 14 Oct 2015 19:34:53 -0700 Subject: [PATCH] feat(library): add Meng&Paulson heuristic for selecting theorems --- src/frontends/lean/builtin_cmds.cpp | 72 +++++++++----- src/frontends/lean/token_table.cpp | 2 +- src/library/CMakeLists.txt | 2 +- src/library/init_module.cpp | 3 + src/library/meng_paulson.cpp | 140 +++++++++++++++++++++++++++ src/library/meng_paulson.h | 16 +++ src/util/sexpr/option_declarations.h | 1 + 7 files changed, 210 insertions(+), 26 deletions(-) create mode 100644 src/library/meng_paulson.cpp create mode 100644 src/library/meng_paulson.h diff --git a/src/frontends/lean/builtin_cmds.cpp b/src/frontends/lean/builtin_cmds.cpp index 0a8621437..8b3b5d665 100644 --- a/src/frontends/lean/builtin_cmds.cpp +++ b/src/frontends/lean/builtin_cmds.cpp @@ -38,6 +38,7 @@ Author: Leonardo de Moura #include "library/projection.h" #include "library/private.h" #include "library/decl_stats.h" +#include "library/meng_paulson.h" #include "library/definitional/projection.h" #include "library/simplifier/simp_rule_set.h" #include "compiler/preprocess_rec.h" @@ -1109,12 +1110,13 @@ static void display_name_set(parser & p, name const & n, name_set const & s) { static environment decl_stats_cmd(parser & p) { environment const & env = p.env(); - std::cout << "Use sets\n"; + io_state_stream out = p.regular_stream(); + out << "Use sets\n"; env.for_each_declaration([&](declaration const & d) { if ((d.is_theorem() || d.is_axiom()) && !is_private(env, d.get_name())) display_name_set(p, d.get_name(), get_use_set(env, d.get_name())); }); - std::cout << "Used-by sets\n"; + out << "Used-by sets\n"; env.for_each_declaration([&](declaration const & d) { if (!d.is_theorem() && !d.is_axiom() && !is_private(env, d.get_name())) display_name_set(p, d.get_name(), get_used_by_set(env, d.get_name())); @@ -1122,32 +1124,54 @@ static environment decl_stats_cmd(parser & p) { return env; } +static environment relevant_thms_cmd(parser & p) { + environment const & env = p.env(); + name_set R; + while (p.curr_is_identifier()) { + R.insert(p.check_constant_next("invalid #relevant_thms command, constant expected")); + } + name_set TS = get_relevant_thms(env, p.get_options(), R); + io_state_stream out = p.regular_stream(); + out << "{"; + bool first = true; + TS.for_each([&](name const & T) { + if (first) + first = false; + else + out << ", "; + out << T; + }); + out << "}"; + return env; +} + void init_cmd_table(cmd_table & r) { - add_cmd(r, cmd_info("open", "create aliases for declarations, and use objects defined in other namespaces", + add_cmd(r, cmd_info("open", "create aliases for declarations, and use objects defined in other namespaces", open_cmd)); - add_cmd(r, cmd_info("export", "create abbreviations for declarations, " + add_cmd(r, cmd_info("export", "create abbreviations for declarations, " "and export objects defined in other namespaces", export_cmd)); - add_cmd(r, cmd_info("override", "override notation declarations using the ones defined in the given namespace", + add_cmd(r, cmd_info("override", "override notation declarations using the ones defined in the given namespace", override_cmd)); - add_cmd(r, cmd_info("set_option", "set configuration option", set_option_cmd)); - add_cmd(r, cmd_info("exit", "exit", exit_cmd)); - add_cmd(r, cmd_info("print", "print a string", print_cmd)); - add_cmd(r, cmd_info("section", "open a new section", section_cmd)); - add_cmd(r, cmd_info("namespace", "open a new namespace", namespace_cmd)); - add_cmd(r, cmd_info("end", "close the current namespace/section", end_scoped_cmd)); - add_cmd(r, cmd_info("check", "type check given expression, and display its type", check_cmd)); - add_cmd(r, cmd_info("eval", "evaluate given expression", eval_cmd)); - add_cmd(r, cmd_info("find_decl", "find definitions and/or theorems", find_cmd)); - add_cmd(r, cmd_info("local", "define local attributes or notation", local_cmd)); - add_cmd(r, cmd_info("help", "brief description of available commands and options", help_cmd)); - add_cmd(r, cmd_info("init_quotient", "initialize quotient type computational rules", init_quotient_cmd)); - add_cmd(r, cmd_info("init_hits", "initialize builtin HITs", init_hits_cmd)); - add_cmd(r, cmd_info("#erase_cache", "erase cached definition (for debugging purposes)", erase_cache_cmd)); - add_cmd(r, cmd_info("#projections", "generate projections for inductive datatype (for debugging purposes)", projections_cmd)); - add_cmd(r, cmd_info("#telescope_eq", "(for debugging purposes)", telescope_eq_cmd)); - add_cmd(r, cmd_info("#compile", "(for debugging purposes)", compile_cmd)); - add_cmd(r, cmd_info("#accessible", "(for debugging purposes) display number of accessible declarations for blast tactic", accessible_cmd)); - add_cmd(r, cmd_info("#decl_stats", "(for debugging purposes) display declaration statistics", decl_stats_cmd)); + add_cmd(r, cmd_info("set_option", "set configuration option", set_option_cmd)); + add_cmd(r, cmd_info("exit", "exit", exit_cmd)); + add_cmd(r, cmd_info("print", "print a string", print_cmd)); + add_cmd(r, cmd_info("section", "open a new section", section_cmd)); + add_cmd(r, cmd_info("namespace", "open a new namespace", namespace_cmd)); + add_cmd(r, cmd_info("end", "close the current namespace/section", end_scoped_cmd)); + add_cmd(r, cmd_info("check", "type check given expression, and display its type", check_cmd)); + add_cmd(r, cmd_info("eval", "evaluate given expression", eval_cmd)); + add_cmd(r, cmd_info("find_decl", "find definitions and/or theorems", find_cmd)); + add_cmd(r, cmd_info("local", "define local attributes or notation", local_cmd)); + add_cmd(r, cmd_info("help", "brief description of available commands and options", help_cmd)); + add_cmd(r, cmd_info("init_quotient", "initialize quotient type computational rules", init_quotient_cmd)); + add_cmd(r, cmd_info("init_hits", "initialize builtin HITs", init_hits_cmd)); + add_cmd(r, cmd_info("#erase_cache", "erase cached definition (for debugging purposes)", erase_cache_cmd)); + add_cmd(r, cmd_info("#projections", "generate projections for inductive datatype (for debugging purposes)", projections_cmd)); + add_cmd(r, cmd_info("#telescope_eq", "(for debugging purposes)", telescope_eq_cmd)); + add_cmd(r, cmd_info("#compile", "(for debugging purposes)", compile_cmd)); + add_cmd(r, cmd_info("#accessible", "(for debugging purposes) display number of accessible declarations for blast tactic", accessible_cmd)); + add_cmd(r, cmd_info("#decl_stats", "(for debugging purposes) display declaration statistics", decl_stats_cmd)); + add_cmd(r, cmd_info("#relevant_thms", "(for debugging purposes) select relevant theorems using Meng&Paulson heuristic", relevant_thms_cmd)); register_decl_cmds(r); register_inductive_cmd(r); diff --git a/src/frontends/lean/token_table.cpp b/src/frontends/lean/token_table.cpp index a1d978e89..c9af94812 100644 --- a/src/frontends/lean/token_table.cpp +++ b/src/frontends/lean/token_table.cpp @@ -119,7 +119,7 @@ void init_token_table(token_table & t) { "add_begin_end_tactic", "set_begin_end_tactic", "instance", "class", "multiple_instances", "find_decl", "attribute", "persistent", "include", "omit", "migrate", "init_quotient", "init_hits", "#erase_cache", "#projections", "#telescope_eq", - "#compile", "#accessible", "#decl_stats", nullptr}; + "#compile", "#accessible", "#decl_stats", "#relevant_thms", nullptr}; pair aliases[] = {{g_lambda_unicode, "fun"}, {"forall", "Pi"}, {g_forall_unicode, "Pi"}, {g_pi_unicode, "Pi"}, diff --git a/src/library/CMakeLists.txt b/src/library/CMakeLists.txt index 8b04fe2f7..17c6f8dfe 100644 --- a/src/library/CMakeLists.txt +++ b/src/library/CMakeLists.txt @@ -16,4 +16,4 @@ add_library(library OBJECT deep_copy.cpp expr_lt.cpp io_state.cpp occurs.cpp relation_manager.cpp export.cpp user_recursors.cpp class_instance_synth.cpp idx_metavar.cpp composition_manager.cpp tc_multigraph.cpp noncomputable.cpp aux_recursors.cpp norm_num.cpp - decl_stats.cpp) + decl_stats.cpp meng_paulson.cpp) diff --git a/src/library/init_module.cpp b/src/library/init_module.cpp index 9a2378f96..1162661d9 100644 --- a/src/library/init_module.cpp +++ b/src/library/init_module.cpp @@ -42,6 +42,7 @@ Author: Leonardo de Moura #include "library/noncomputable.h" #include "library/aux_recursors.h" #include "library/decl_stats.h" +#include "library/meng_paulson.h" namespace lean { void initialize_library_module() { @@ -83,9 +84,11 @@ void initialize_library_module() { initialize_noncomputable(); initialize_aux_recursors(); initialize_decl_stats(); + initialize_meng_paulson(); } void finalize_library_module() { + finalize_meng_paulson(); finalize_decl_stats(); finalize_aux_recursors(); finalize_noncomputable(); diff --git a/src/library/meng_paulson.cpp b/src/library/meng_paulson.cpp new file mode 100644 index 000000000..0ed8254cb --- /dev/null +++ b/src/library/meng_paulson.cpp @@ -0,0 +1,140 @@ +/* +Copyright (c) 2015 Microsoft Corporation. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. + +Author: Leonardo de Moura + +This module implements a heuristic for selecting relevant theorems based on +the approach described at + + "Lightweight relevance filtering for machine-generated resolution problems" + Jia Meng and Larry Paulson + Journal of Applied Logic 7 2009 +*/ +#include +#include "util/sexpr/option_declarations.h" +#include "kernel/environment.h" +#include "library/decl_stats.h" +#include "library/private.h" + +#ifndef LEAN_DEFAULT_MENG_PAULSON_P +#define LEAN_DEFAULT_MENG_PAULSON_P 0.6 +#endif + +#ifndef LEAN_DEFAULT_MENG_PAULSON_C +#define LEAN_DEFAULT_MENG_PAULSON_C 2.4 +#endif + +namespace lean { +static name * g_meng_paulson_p = nullptr; +static name * g_meng_paulson_c = nullptr; + +void initialize_meng_paulson() { + g_meng_paulson_p = new name{"meng_paulson", "p"}; + g_meng_paulson_c = new name{"meng_paulson", "c"}; + + register_double_option(*g_meng_paulson_p, LEAN_DEFAULT_MENG_PAULSON_P, + "(theorem selection) control parameter for the Meng&Paulson theorem selection heuristic" + "(for details see paper \"Lightweight relevance filtering for machine-generated resolution problems)\""); + register_double_option(*g_meng_paulson_c, LEAN_DEFAULT_MENG_PAULSON_C, + "(theorem selection) control parameter for the Meng&Paulson theorem selection heuristic" + "(for details see paper \"Lightweight relevance filtering for machine-generated resolution problems)\""); +} + +void finalize_meng_paulson() { + delete g_meng_paulson_p; + delete g_meng_paulson_c; +} + +double get_meng_paulson_p(options const & o) { + return o.get_double(*g_meng_paulson_p, LEAN_DEFAULT_MENG_PAULSON_P); +} + +double get_meng_paulson_c(options const & o) { + return o.get_double(*g_meng_paulson_c, LEAN_DEFAULT_MENG_PAULSON_C); +} + +class relevant_thms_fn { + environment m_env; + double m_p; + double m_c; + name_set m_relevant; + + double get_weight(name const & n) const { + double r = get_num_occs(m_env, n); + return 1.0 + 2.0 / log(r + 1.0); + } + + double get_thm_score(name const & n) const { + name_set s = get_use_set(m_env, n); + unsigned IR = 0; + double M = 0.0; + s.for_each([&](name const & F) { + if (m_relevant.contains(F)) { + M += get_weight(F); + } else { + IR++; + } + }); + if (M > 0.0) + return M / (M + IR); + else + return 0.0; + } +public: + relevant_thms_fn(environment const & env, double p, double c, name_set const & rel): + m_env(env), m_p(p), m_c(c), m_relevant(rel) { + lean_assert(c > 0.0); + } + + name_set operator()() { + name_set A; + while (true) { + name_set Rel; + m_relevant.for_each([&](name const & c) { + name_set used_by = get_used_by_set(m_env, c); + used_by.for_each([&](name const & T) { + declaration const & T_decl = m_env.get(T); + if (A.contains(T)) + return; // T is already in the result set + if (!T_decl.is_theorem() && !T_decl.is_axiom()) + return; // we only care about axioms and theorems + if (is_private(m_env, T)) + return; // we ignore private decls + double M = get_thm_score(T); + if (M < m_p) + return; // score is to low + Rel.insert(T); + A.insert(T); + }); + + }); + if (Rel.empty()) + break; + // include symbols of new theorems in m_relevant + Rel.for_each([&](name const & T) { + name_set uses = get_use_set(m_env, T); + uses.for_each([&](name const & c) { + declaration const & c_decl = m_env.get(c); + if (c_decl.is_theorem() || c_decl.is_axiom()) + return; // we ignore theorems occurring in types + if (is_private(m_env, c)) + return; // we ignore private decls + m_relevant.insert(c); + }); + }); + m_p = m_p + (1.0 - m_p) / m_c; + } + return A; + } +}; + +name_set get_relevant_thms(environment const & env, double p, double c, + name_set const & relevant_symbols) { + return relevant_thms_fn(env, p, c, relevant_symbols)(); +} + +name_set get_relevant_thms(environment const & env, options const & o, name_set const & relevant_symbols) { + return relevant_thms_fn(env, get_meng_paulson_p(o), get_meng_paulson_c(o), relevant_symbols)(); +} +} diff --git a/src/library/meng_paulson.h b/src/library/meng_paulson.h new file mode 100644 index 000000000..adc19e33a --- /dev/null +++ b/src/library/meng_paulson.h @@ -0,0 +1,16 @@ +/* +Copyright (c) 2015 Microsoft Corporation. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. + +Author: Leonardo de Moura +*/ +#pragma once +#include "util/name_set.h" +#include "kernel/environment.h" + +namespace lean { +name_set get_relevant_thms(environment const & env, double p, double c, name_set const & relevant_symbols); +name_set get_relevant_thms(environment const & env, options const & o, name_set const & relevant_symbols); +void initialize_meng_paulson(); +void finalize_meng_paulson(); +} diff --git a/src/util/sexpr/option_declarations.h b/src/util/sexpr/option_declarations.h index 1c2367c63..a45c42839 100644 --- a/src/util/sexpr/option_declarations.h +++ b/src/util/sexpr/option_declarations.h @@ -39,4 +39,5 @@ option_declarations const & get_option_declarations(); void register_option(name const & n, option_kind k, char const * default_value, char const * description); #define register_bool_option(n, v, d) register_option(n, BoolOption, LEAN_STR(v), d) #define register_unsigned_option(n, v, d) register_option(n, UnsignedOption, LEAN_STR(v), d) +#define register_double_option(n, v, d) register_option(n, DoubleOption, LEAN_STR(v), d) }