feat(kernel/expr): add hash code based on allocation time

The new hash code has the property that given expr_cell * c1 and expr_cell * c2,
if c1 != c2 then there is a high propbability that c1->hash_alloc() != c2->hash_alloc().

The structural hash code hash() does not have this property because we may have
c1 != c2, but c1 and c2 are structurally equal.

The new hash code is only compatible with pointer equality.
By compatible we mean, if c1 == c2, then c1->hash_alloc() == c2->hash_alloc().
This property is obvious because hash_alloc() does not have side-effects.

The test tests/lua/big.lua exposes the problem fixed by this commit.

Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
This commit is contained in:
Leonardo de Moura 2013-11-14 02:31:54 -08:00
parent ae7ea99b56
commit 691893258d
12 changed files with 62 additions and 17 deletions

View file

@ -149,9 +149,9 @@ expr replace_var_with_name(expr const & a, name const & n) {
/** \brief Functional object for pretty printing expressions */
class pp_fn {
typedef scoped_map<expr, name, expr_hash, expr_eqp> aliases;
typedef std::vector<std::pair<name, format>> aliases_defs;
typedef scoped_set<name, name_hash, name_eq> local_names;
typedef scoped_map<expr, name, expr_hash_alloc, expr_eqp> aliases;
typedef std::vector<std::pair<name, format>> aliases_defs;
typedef scoped_set<name, name_hash, name_eq> local_names;
frontend const & m_frontend;
// State
aliases m_aliases;

View file

@ -41,7 +41,17 @@ expr_cell::expr_cell(expr_kind k, unsigned h, bool has_mv):
m_kind(static_cast<unsigned>(k)),
m_flags(has_mv ? 4 : 0),
m_hash(h),
m_rc(1) {}
m_rc(1) {
// m_hash_alloc does not need to be a unique identifier.
// We want diverse hash codes such that given expr_cell * c1 and expr_cell * c2,
// if c1 != c2, then there is high probability c1->m_hash_alloc != c2->m_hash_alloc.
// Remark: using pointer address as a hash code is not a good idea.
// - each execution run will behave differently.
// - the hash is not diverse enough
static thread_local unsigned g_hash_alloc_counter = 0;
m_hash_alloc = g_hash_alloc_counter;
g_hash_alloc_counter++;
}
expr_var::expr_var(unsigned idx):
expr_cell(expr_kind::Var, idx, false),

View file

@ -70,7 +70,8 @@ protected:
#else
std::atomic_ushort m_flags;
#endif
unsigned m_hash;
unsigned m_hash; // hash based on the structure of the expression (this is a good hash for structural equality)
unsigned m_hash_alloc; // hash based on 'time' of allocation (this is a good hash for pointer-based equality)
MK_LEAN_RC(); // Declare m_rc counter
void dealloc();
@ -85,6 +86,7 @@ public:
expr_cell(expr_kind k, unsigned h, bool has_mv);
expr_kind kind() const { return static_cast<expr_kind>(m_kind); }
unsigned hash() const { return m_hash; }
unsigned hash_alloc() const { return m_hash_alloc; }
bool has_metavar() const { return (m_flags & 4) != 0; }
};
/**
@ -111,6 +113,7 @@ public:
expr_kind kind() const { return m_ptr->kind(); }
unsigned hash() const { return m_ptr ? m_ptr->hash() : 23; }
unsigned hash_alloc() const { return m_ptr ? m_ptr->hash_alloc() : 23; }
bool has_metavar() const { return m_ptr->has_metavar(); }
expr_cell * raw() const { return m_ptr; }
@ -495,18 +498,25 @@ typedef std::pair<expr_cell*, unsigned> expr_cell_offset;
// Auxiliary functionals
/** \brief Functional object for hashing kernel expressions. */
struct expr_hash { unsigned operator()(expr const & e) const { return e.hash(); } };
/**
\brief Functional object for hashing (based on allocation time) kernel expressions.
This hash is compatible with pointer equality.
\warning This hash is incompatible with structural equality (i.e., std::equal_to<expr>)
*/
struct expr_hash_alloc { unsigned operator()(expr const & e) const { return e.hash_alloc(); } };
/** \brief Functional object for testing pointer equality between kernel expressions. */
struct expr_eqp { bool operator()(expr const & e1, expr const & e2) const { return is_eqp(e1, e2); } };
/** \brief Functional object for hashing kernel expression cells. */
struct expr_cell_hash { unsigned operator()(expr_cell * e) const { return e->hash(); } };
struct expr_cell_hash { unsigned operator()(expr_cell * e) const { return e->hash_alloc(); } };
/** \brief Functional object for testing pointer equality between kernel cell expressions. */
struct expr_cell_eqp { bool operator()(expr_cell * e1, expr_cell * e2) const { return e1 == e2; } };
/** \brief Functional object for hashing a pair (n, k) where n is a kernel expressions, and k is an offset. */
struct expr_offset_hash { unsigned operator()(expr_offset const & p) const { return hash(p.first.hash(), p.second); } };
struct expr_offset_hash { unsigned operator()(expr_offset const & p) const { return hash(p.first.hash_alloc(), p.second); } };
/** \brief Functional object for comparing pairs (expression, offset). */
struct expr_offset_eqp { unsigned operator()(expr_offset const & p1, expr_offset const & p2) const { return is_eqp(p1.first, p2.first) && p1.second == p2.second; } };
/** \brief Functional object for hashing a pair (n, k) where n is a kernel cell expressions, and k is an offset. */
struct expr_cell_offset_hash { unsigned operator()(expr_cell_offset const & p) const { return hash(p.first->hash(), p.second); } };
struct expr_cell_offset_hash { unsigned operator()(expr_cell_offset const & p) const { return hash(p.first->hash_alloc(), p.second); } };
/** \brief Functional object for comparing pairs (expression cell, offset). */
struct expr_cell_offset_eqp { unsigned operator()(expr_cell_offset const & p1, expr_cell_offset const & p2) const { return p1 == p2; } };
// =======================================

View file

@ -11,7 +11,7 @@ Author: Leonardo de Moura
namespace lean {
template<typename T>
using expr_map = typename std::unordered_map<expr, T, expr_hash, expr_eqp>;
using expr_map = typename std::unordered_map<expr, T, expr_hash_alloc, expr_eqp>;
template<typename T>
using expr_offset_map = typename std::unordered_map<expr_offset, T, expr_offset_hash, expr_offset_eqp>;

View file

@ -15,7 +15,7 @@ namespace lean {
// =======================================
// Expression Set && Expression Offset Set
// Remark: to expressions are assumed to be equal if they are "pointer-equal"
typedef std::unordered_set<expr, expr_hash, expr_eqp> expr_set;
typedef std::unordered_set<expr, expr_hash_alloc, expr_eqp> expr_set;
typedef std::unordered_set<expr_offset, expr_offset_hash, expr_offset_eqp> expr_offset_set;
// =======================================
@ -39,7 +39,7 @@ typedef std::unordered_set<expr_cell_offset, expr_cell_offset_hash, expr_cell_of
// garbage collected.
typedef std::pair<expr_cell *, expr_cell *> expr_cell_pair;
struct expr_cell_pair_hash {
unsigned operator()(expr_cell_pair const & p) const { return hash(p.first->hash(), p.second->hash()); }
unsigned operator()(expr_cell_pair const & p) const { return hash(p.first->hash_alloc(), p.second->hash_alloc()); }
};
struct expr_cell_pair_eqp {
bool operator()(expr_cell_pair const & p1, expr_cell_pair const & p2) const {

View file

@ -68,7 +68,7 @@ value_stack extend(value_stack const & s, svalue const & v) { return cons(v, s);
/** \brief Expression normalizer. */
class normalizer::imp {
typedef scoped_map<expr, svalue, expr_hash, expr_eqp> cache;
typedef scoped_map<expr, svalue, expr_hash_alloc, expr_eqp> cache;
environment::weak_ref m_env;
context m_ctx;

View file

@ -21,7 +21,7 @@ namespace lean {
*/
class replace_visitor {
protected:
typedef scoped_map<expr, expr, expr_hash, expr_eqp> cache;
typedef scoped_map<expr, expr, expr_hash_alloc, expr_eqp> cache;
cache m_cache;
context m_ctx;

View file

@ -20,7 +20,7 @@ namespace lean {
static name g_x_name("x");
/** \brief Auxiliary functional object used to implement infer_type. */
class type_checker::imp {
typedef scoped_map<expr, expr, expr_hash, expr_eqp> cache;
typedef scoped_map<expr, expr, expr_hash_alloc, expr_eqp> cache;
typedef buffer<unification_constraint> unification_constraints;
environment::weak_ref m_env;

View file

@ -15,6 +15,10 @@ typedef std::pair<expr, expr> expr_pair;
struct expr_pair_hash {
unsigned operator()(expr_pair const & p) const { return hash(p.first.hash(), p.second.hash()); }
};
/** \brief Functional object for hashing expression pairs (based on their allocation time). */
struct expr_pair_hash_alloc {
unsigned operator()(expr_pair const & p) const { return hash(p.first.hash_alloc(), p.second.hash_alloc()); }
};
/** \brief Functional object for comparing expression pairs. */
struct expr_pair_eq {
unsigned operator()(expr_pair const & p1, expr_pair const & p2) const { return p1.first == p2.first && p1.second == p2.second; }

View file

@ -6,6 +6,7 @@ Author: Leonardo de Moura
*/
#include <tuple>
#include <unordered_set>
#include <functional>
#include "util/buffer.h"
#include "library/max_sharing.h"
@ -15,8 +16,7 @@ namespace lean {
shared sub-expressions.
*/
struct max_sharing_fn::imp {
struct expr_struct_eq { unsigned operator()(expr const & e1, expr const & e2) const { return e1 == e2; }};
typedef typename std::unordered_set<expr, expr_hash, expr_struct_eq> expr_cache;
typedef typename std::unordered_set<expr, expr_hash, std::equal_to<expr>> expr_cache;
expr_cache m_cache;

View file

@ -20,7 +20,7 @@ Author: Leonardo de Moura
namespace lean {
static name g_x_name("x");
class type_inferer::imp {
typedef scoped_map<expr, expr, expr_hash, expr_eqp> cache;
typedef scoped_map<expr, expr, expr_hash_alloc, expr_eqp> cache;
typedef buffer<unification_constraint> unification_constraints;
environment m_env;

21
tests/lua/big.lua Normal file
View file

@ -0,0 +1,21 @@
f, a, b = Consts("f, a, b")
nodes = {}
function mk_big(num)
local r
if num == 0 then
r = f(a, b)
else
r = f(mk_big(num-1), mk_big(num-1))
end
return r
end
function size(e)
local r = 0
e:for_each(function(e, o) assert(e:is_app() or e:is_constant()); r = r + 1 end)
return r
end
local F = mk_big(14)
print(size(F))