Fix to the TLS fix - deletes the local TLS data in the reverse order of creation

The previous fix also had a conceptual error: it allowed the per-thread map to
be modified concurrently, as the behavior of map::find/map::operator[] is to
insert a new element with that key if it is not found.

Also this new fix uses a recursive mutex (also available in C++11) as the same
thread will aquire the lock during conditions such as destruction, e.g. a TLS
entry querying the current thread for logging as part of its destructor.
metadata
Edgar Velazquez-Armendariz 2013-01-26 23:08:11 -05:00
parent 986a0fc91e
commit 4f9315ae1a
1 changed files with 55 additions and 37 deletions

View File

@ -17,19 +17,26 @@
*/ */
#include <mitsuba/core/tls.h> #include <mitsuba/core/tls.h>
#include <boost/thread/mutex.hpp> #include <boost/thread/mutex.hpp>
#include <boost/thread/recursive_mutex.hpp>
#include <boost/thread/locks.hpp> #include <boost/thread/locks.hpp>
#include <boost/unordered_map.hpp> #include <boost/unordered_set.hpp>
#include <set>
#include <boost/scoped_ptr.hpp>
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/sequenced_index.hpp>
#if defined(__OSX__) #if defined(__OSX__)
# include <pthread.h> # include <pthread.h>
#endif #endif
// Always use SSE fence instructions
#include <emmintrin.h>
MTS_NAMESPACE_BEGIN MTS_NAMESPACE_BEGIN
namespace mi = boost::multi_index;
/* The native TLS classes on Linux/MacOS/Windows only support a limited number /* The native TLS classes on Linux/MacOS/Windows only support a limited number
of dynamically allocated entries (usually 1024 or 1088). Furthermore, they of dynamically allocated entries (usually 1024 or 1088). Furthermore, they
do not provide appropriate cleanup semantics when the TLS object or one of do not provide appropriate cleanup semantics when the TLS object or one of
@ -48,16 +55,37 @@ struct TLSEntry {
inline TLSEntry() : data(NULL), destructFunctor(NULL) { } inline TLSEntry() : data(NULL), destructFunctor(NULL) { }
}; };
/// boost multi-index element to act as replacement of map<Key,T>
template<typename T1, typename T2>
struct mutable_pair {
mutable_pair(const T1 &f, const T2 &s) : first(f), second(s) { }
T1 first;
mutable T2 second;
};
/// Per-thread TLS entry map /// Per-thread TLS entry map
struct PerThreadData { struct PerThreadData {
typedef boost::unordered_map<void *, TLSEntry> Map; typedef mutable_pair<void *, TLSEntry> MapData;
typedef mi::member<MapData, void *, &MapData::first> key_member;
struct seq_tag {};
struct key_tag {};
typedef mi::multi_index_container<MapData,
mi::indexed_by<
mi::hashed_unique<mi::tag<key_tag>, key_member>,
mi::sequenced<mi::tag<seq_tag> >
>
> Map;
typedef mi::index<Map, key_tag>::type::iterator key_iterator;
typedef mi::index<Map, seq_tag>::type::reverse_iterator reverse_iterator;
Map map; Map map;
boost::mutex mutex; boost::recursive_mutex mutex;
}; };
/// List of all PerThreadData data structures (one for each thread) /// List of all PerThreadData data structures (one for each thread)
std::set<PerThreadData *> ptdGlobal; boost::unordered_set<PerThreadData *> ptdGlobal;
/// Lock to protect ptdGlobal /// Lock to protect ptdGlobal
boost::mutex ptdGlobalLock; boost::mutex ptdGlobalLock;
@ -82,10 +110,10 @@ struct ThreadLocalBase::ThreadLocalPrivate {
and clean up where necessary */ and clean up where necessary */
boost::lock_guard<boost::mutex> guard(ptdGlobalLock); boost::lock_guard<boost::mutex> guard(ptdGlobalLock);
for (std::set<PerThreadData *>::iterator it = ptdGlobal.begin(); for (boost::unordered_set<PerThreadData *>::iterator it = ptdGlobal.begin();
it != ptdGlobal.end(); ++it) { it != ptdGlobal.end(); ++it) {
PerThreadData *ptd = *it; PerThreadData *ptd = *it;
boost::unique_lock<boost::mutex> lock(ptd->mutex); boost::unique_lock<boost::recursive_mutex> lock(ptd->mutex);
PerThreadData::Map::iterator it2 = ptd->map.find(this); PerThreadData::Map::iterator it2 = ptd->map.find(this);
TLSEntry entry; TLSEntry entry;
@ -115,31 +143,20 @@ struct ThreadLocalBase::ThreadLocalPrivate {
throw std::runtime_error("null per-thread data"); throw std::runtime_error("null per-thread data");
} }
/* Double-checked lock to initialize the data on the first access: void *data;
* Most of the times, the data is already there and thus we don't need boost::lock_guard<boost::recursive_mutex> guard(ptd->mutex);
* a lock. When we need to create the data on the first acess, we PerThreadData::key_iterator it = ptd->map.find(this);
* acquire the lock and check again, as someone else might have created if (EXPECT_TAKEN(it != ptd->map.end())) {
* the data already. Some systems (e.g. Windows Vista) provide special data = it->second.data;
* APIs to guarantee single initialization. See: } else {
* http://en.wikipedia.org/wiki/Double-checked_locking TLSEntry entry;
* http://blogs.msdn.com/b/oldnewthing/archive/2011/04/08/10151258.aspx entry.data = data = constructFunctor();
* http://msdn.microsoft.com/en-us/library/aa363808 entry.destructFunctor = destructFunctor;
* [Links as of January 2013] ptd->map.insert(PerThreadData::MapData(this, entry));
*/ existed = false;
TLSEntry &entry = ptd->map[this];
if (EXPECT_NOT_TAKEN(!entry.data)) {
boost::lock_guard<boost::mutex> guard(ptd->mutex);
if (!entry.data) {
entry.data = constructFunctor();
entry.destructFunctor = destructFunctor;
// Fence for write-release semantics
_mm_sfence();
existed = false;
}
} }
// Fence for read-acquire semantics
_mm_lfence(); return std::make_pair(data, existed);
return std::make_pair(entry.data, existed);
} }
}; };
@ -210,10 +227,11 @@ void destroyLocalTLS() {
PerThreadData *ptd = ptdLocal; PerThreadData *ptd = ptdLocal;
#endif #endif
boost::unique_lock<boost::mutex> lock(ptd->mutex); boost::unique_lock<boost::recursive_mutex> lock(ptd->mutex);
for (PerThreadData::Map::iterator it = ptd->map.begin(); // Destroy the data in reverse order of creation
it != ptd->map.end(); ++it) { for (PerThreadData::reverse_iterator it = mi::get<PerThreadData::seq_tag>(ptd->map).rbegin();
it != mi::get<PerThreadData::seq_tag>(ptd->map).rend(); ++it) {
TLSEntry &entry = it->second; TLSEntry &entry = it->second;
entry.destructFunctor(entry.data); entry.destructFunctor(entry.data);
} }