perf(kernel/for_each_fn): use cache stack trick at for_each_fn

Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
2014-07-23 18:51:39 -07:00 · 2014-07-23 18:51:39 -07:00 · 1f2099e298
commit 1f2099e298
parent 7d25158254
3 changed files with 88 additions and 37 deletions
--- a/src/kernel/cache_stack.h
+++ b/src/kernel/cache_stack.h
@ -0,0 +1,42 @@
 /*
 Copyright (c) 2013 Microsoft Corporation. All rights reserved.
 Released under Apache 2.0 license as described in the file LICENSE.
 Author: Leonardo de Moura
 */
 #pragma once
 #include <vector>
 #include <memory>
 #include "util/debug.h"
 /** \brief Macro for creating a stack of objects of type Cache in thread local storage.
    The argument \c Arg is provided to every new instance of Cache.
    The macro provides the helper class Cache_ref that "reuses" cache objects from the stack.
 */
 #define MK_CACHE_STACK(Cache, Arg)                                      \
 struct Cache ## _stack {                                                \
    unsigned                                     m_top;                 \
    std::vector<std::unique_ptr<Cache>> m_cache_stack;                  \
    Cache ## _stack():m_top(0) {}                                       \
 };                                                                      \
 MK_THREAD_LOCAL_GET_DEF(Cache ## _stack, get_ ## Cache ## _stack);      \
 class Cache ## _ref {                                                   \
    Cache * m_cache;                                                    \
 public:                                                                 \
    Cache ## _ref() {                                                   \
        Cache ## _stack & s = get_ ## Cache ## _stack();                \
        lean_assert(s.m_top <= s.m_cache_stack.size());                 \
        if (s.m_top == s.m_cache_stack.size())                          \
            s.m_cache_stack.push_back(std::unique_ptr<Cache>(new Cache(Arg))); \
        m_cache = s.m_cache_stack[s.m_top].get();                       \
        s.m_top++;                                                      \
    }                                                                   \
    ~Cache ## _ref() {                                                  \
        Cache ## _stack & s = get_ ## Cache ## _stack();                \
        lean_assert(s.m_top > 0);                                       \
        s.m_top--;                                                      \
        m_cache->clear();                                               \
    }                                                                   \
    Cache * operator->() const { return m_cache; }                      \
 };                                                                      \
--- a/src/kernel/for_each_fn.cpp
+++ b/src/kernel/for_each_fn.cpp
@ -4,12 +4,52 @@ Released under Apache 2.0 license as described in the file LICENSE.
 Author: Leonardo de Moura
 */
 #include <vector>
 #include <utility>
 #include "util/flet.h"
 #include "kernel/for_each_fn.h"
 #include "kernel/cache_stack.h"
 #ifndef LEAN_DEFAULT_FOR_EACH_CACHE_CAPACITY
 #define LEAN_DEFAULT_FOR_EACH_CACHE_CAPACITY 1024*8
 #endif
 namespace lean {
 struct for_each_cache {
    struct entry {
        expr_cell const * m_cell;
        unsigned          m_offset;
        entry():m_cell(nullptr) {}
    };
    unsigned              m_capacity;
    std::vector<entry>    m_cache;
    std::vector<unsigned> m_used;
    for_each_cache(unsigned c):m_capacity(c), m_cache(c) {}
    bool visited(expr const & e, unsigned offset) {
        unsigned i = hash(e.hash_alloc(), offset) % m_capacity;
        if (m_cache[i].m_cell == e.raw() && m_cache[i].m_offset == offset) {
            return true;
        } else {
            if (m_cache[i].m_cell == nullptr)
                m_used.push_back(i);
            m_cache[i].m_cell   = e.raw();
            m_cache[i].m_offset = offset;
            return false;
        }
    }
    void clear() {
        for (unsigned i : m_used)
            m_cache[i].m_cell = nullptr;
        m_used.clear();
    }
 };
 MK_CACHE_STACK(for_each_cache, LEAN_DEFAULT_FOR_EACH_CACHE_CAPACITY)
 class for_each_fn {
-    std::unique_ptr<expr_cell_offset_set>       m_visited;
+    for_each_cache_ref                          m_cache;
    std::function<bool(expr const &, unsigned)> m_f; // NOLINT
    void apply(expr const & e, unsigned offset) {
@ -33,14 +73,8 @@ class for_each_fn {
                break;
            }
-            if (is_shared(e)) {
+            if (is_shared(e) && m_cache->visited(e, offset))
-                expr_cell_offset p(e.raw(), offset);
+                goto begin_loop;
                if (!m_visited)
                    m_visited.reset(new expr_cell_offset_set());
                if (m_visited->find(p) != m_visited->end())
                    goto begin_loop;
                m_visited->insert(p);
            }
            if (!m_f(e, offset))
                goto begin_loop;
--- a/src/kernel/replace_fn.cpp
+++ b/src/kernel/replace_fn.cpp
@ -7,9 +7,10 @@ Author: Leonardo de Moura
 #include <vector>
 #include <memory>
 #include "kernel/replace_fn.h"
 #include "kernel/cache_stack.h"
 #ifndef LEAN_DEFAULT_REPLACE_CACHE_CAPACITY
-#define LEAN_DEFAULT_REPLACE_CACHE_CAPACITY 1024*32
+#define LEAN_DEFAULT_REPLACE_CACHE_CAPACITY 1024*8
 #endif
 namespace lean {
@ -51,33 +52,7 @@ struct replace_cache {
    }
 };
-struct replace_cache_stack {
+MK_CACHE_STACK(replace_cache, LEAN_DEFAULT_REPLACE_CACHE_CAPACITY)
    unsigned                                    m_top;
    std::vector<std::unique_ptr<replace_cache>> m_cache_stack;
    replace_cache_stack():m_top(0) {}
 };
 MK_THREAD_LOCAL_GET_DEF(replace_cache_stack, get_replace_cache_stack);
 class replace_cache_ref {
    replace_cache * m_cache;
 public:
    replace_cache_ref() {
        replace_cache_stack & s = get_replace_cache_stack();
        lean_assert(s.m_top <= s.m_cache_stack.size());
        if (s.m_top == s.m_cache_stack.size())
            s.m_cache_stack.push_back(std::unique_ptr<replace_cache>(new replace_cache(LEAN_DEFAULT_REPLACE_CACHE_CAPACITY)));
        m_cache = s.m_cache_stack[s.m_top].get();
        s.m_top++;
    }
    ~replace_cache_ref() {
        replace_cache_stack & s = get_replace_cache_stack();
        lean_assert(s.m_top > 0);
        s.m_top--;
        m_cache->clear();
    }
    replace_cache * operator->() const { return m_cache; }
 };
 /**
   \brief Functional for applying <tt>F</tt> to the subexpressions of a given expression.