Fixed core affinity handling, specified TLS dialect on Linux
- Fixed core affinity handling for Linux clusters with CPU quotas (e.g. the ETH Brutus cluster). - Added an explicit -mtls-dialect flag to the Linux config.py files to prevent crashes when loading the Python plugin in certain settings (particularly: the ETH cluster).metadata
parent
a1eecbb55a
commit
fbb4867f49
|
@ -4,7 +4,7 @@ BUILDDIR = '#build/debug'
|
||||||
DISTDIR = '#dist'
|
DISTDIR = '#dist'
|
||||||
CXX = 'g++'
|
CXX = 'g++'
|
||||||
CC = 'gcc'
|
CC = 'gcc'
|
||||||
CXXFLAGS = ['-O0', '-Wall', '-g', '-pipe', '-march=nocona', '-msse2', '-ftree-vectorize', '-mfpmath=sse', '-funsafe-math-optimizations', '-fno-rounding-math', '-fno-signaling-nans', '-fno-math-errno', '-fno-omit-frame-pointer', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fopenmp', '-fvisibility=hidden']
|
CXXFLAGS = ['-O0', '-Wall', '-g', '-pipe', '-march=nocona', '-msse2', '-ftree-vectorize', '-mfpmath=sse', '-funsafe-math-optimizations', '-fno-rounding-math', '-fno-signaling-nans', '-fno-math-errno', '-fno-omit-frame-pointer', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fopenmp', '-fvisibility=hidden', '-mtls-dialect=gnu2']
|
||||||
LINKFLAGS = []
|
LINKFLAGS = []
|
||||||
SHLINKFLAGS = ['-rdynamic', '-shared', '-fPIC', '-lstdc++']
|
SHLINKFLAGS = ['-rdynamic', '-shared', '-fPIC', '-lstdc++']
|
||||||
BASEINCLUDE = ['#include']
|
BASEINCLUDE = ['#include']
|
||||||
|
|
|
@ -4,7 +4,7 @@ BUILDDIR = '#build/release'
|
||||||
DISTDIR = '#dist'
|
DISTDIR = '#dist'
|
||||||
CXX = 'g++'
|
CXX = 'g++'
|
||||||
CC = 'gcc'
|
CC = 'gcc'
|
||||||
CXXFLAGS = ['-O3', '-Wall', '-g', '-pipe', '-march=nocona', '-msse2', '-ftree-vectorize', '-mfpmath=sse', '-funsafe-math-optimizations', '-fno-rounding-math', '-fno-signaling-nans', '-fno-math-errno', '-fomit-frame-pointer', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fopenmp', '-fvisibility=hidden']
|
CXXFLAGS = ['-O3', '-Wall', '-g', '-pipe', '-march=nocona', '-msse2', '-ftree-vectorize', '-mfpmath=sse', '-funsafe-math-optimizations', '-fno-rounding-math', '-fno-signaling-nans', '-fno-math-errno', '-fomit-frame-pointer', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fopenmp', '-fvisibility=hidden', '-mtls-dialect=gnu2']
|
||||||
LINKFLAGS = []
|
LINKFLAGS = []
|
||||||
SHLINKFLAGS = ['-rdynamic', '-shared', '-fPIC', '-lstdc++']
|
SHLINKFLAGS = ['-rdynamic', '-shared', '-fPIC', '-lstdc++']
|
||||||
BASEINCLUDE = ['#include']
|
BASEINCLUDE = ['#include']
|
||||||
|
|
|
@ -758,6 +758,21 @@ protected:
|
||||||
*/
|
*/
|
||||||
class MTS_EXPORT_CORE LocalWorker : public Worker {
|
class MTS_EXPORT_CORE LocalWorker : public Worker {
|
||||||
public:
|
public:
|
||||||
|
/**
|
||||||
|
* \brief Create a new local worker thread
|
||||||
|
*
|
||||||
|
* \param coreID
|
||||||
|
* When an CPU core ID (>=0) is specified here, the worker
|
||||||
|
* thread will attempt to register core affinity with the
|
||||||
|
* operating system. Passing -1 disables this.
|
||||||
|
*
|
||||||
|
* \param name
|
||||||
|
* An identifying string for this thread
|
||||||
|
*
|
||||||
|
* \param priority
|
||||||
|
* The desired thread priority (not supported on some
|
||||||
|
* operating systems)
|
||||||
|
*/
|
||||||
LocalWorker(int coreID, const std::string &name,
|
LocalWorker(int coreID, const std::string &name,
|
||||||
Thread::EThreadPriority priority = Thread::ENormalPriority);
|
Thread::EThreadPriority priority = Thread::ENormalPriority);
|
||||||
|
|
||||||
|
|
|
@ -633,7 +633,8 @@ void Worker::start(Scheduler *scheduler, int workerIndex, int coreOffset) {
|
||||||
|
|
||||||
LocalWorker::LocalWorker(int coreID, const std::string &name,
|
LocalWorker::LocalWorker(int coreID, const std::string &name,
|
||||||
Thread::EThreadPriority priority) : Worker(name) {
|
Thread::EThreadPriority priority) : Worker(name) {
|
||||||
setCoreAffinity(coreID);
|
if (coreID >= 0)
|
||||||
|
setCoreAffinity(coreID);
|
||||||
m_coreCount = 1;
|
m_coreCount = 1;
|
||||||
#if !defined(__LINUX__)
|
#if !defined(__LINUX__)
|
||||||
/* Don't set thead priority on Linux, since it uses
|
/* Don't set thead priority on Linux, since it uses
|
||||||
|
|
|
@ -324,24 +324,51 @@ void Thread::setCoreAffinity(int coreID) {
|
||||||
if (getenv("VALGRIND_OPTS") != NULL)
|
if (getenv("VALGRIND_OPTS") != NULL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
int nCores = getCoreCount();
|
int nCores = sysconf(_SC_NPROCESSORS_CONF);
|
||||||
cpu_set_t *cpuset = CPU_ALLOC(nCores);
|
|
||||||
if (cpuset == NULL)
|
|
||||||
Log(EError, "Thread::setCoreAffinity(): could not allocate cpu_set_t");
|
|
||||||
|
|
||||||
size_t size = CPU_ALLOC_SIZE(nCores);
|
size_t size = CPU_ALLOC_SIZE(nCores);
|
||||||
|
cpu_set_t *cpuset = CPU_ALLOC(nCores);
|
||||||
CPU_ZERO_S(size, cpuset);
|
CPU_ZERO_S(size, cpuset);
|
||||||
if (coreID != -1 && coreID < nCores) {
|
if (cpuset == NULL) {
|
||||||
CPU_SET_S(coreID, size, cpuset);
|
Log(EWarn, "Thread::setCoreAffinity(): could not allocate cpu_set_t");
|
||||||
} else {
|
return;
|
||||||
for (int i=0; i<nCores; ++i)
|
|
||||||
CPU_SET_S(i, size, cpuset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const pthread_t threadID = d->thread.native_handle();
|
const pthread_t threadID = d->thread.native_handle();
|
||||||
int retval = pthread_setaffinity_np(threadID, size, cpuset);
|
int retval = pthread_getaffinity_np(threadID, size, cpuset);
|
||||||
if (retval)
|
if (retval) {
|
||||||
Log(EWarn, "Thread::setCoreAffinity(): pthread_setaffinity_np: failed: %s", strerror(errno));
|
Log(EWarn, "Thread::setCoreAffinity(): pthread_getaffinity_np(): could "
|
||||||
|
"not read thread affinity map: %s", strerror(retval));
|
||||||
|
CPU_FREE(cpuset);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int actualCoreID = -1, available = 0;
|
||||||
|
for (int i=0; i<nCores; ++i) {
|
||||||
|
if (!CPU_ISSET_S(i, size, cpuset))
|
||||||
|
continue;
|
||||||
|
if (available++ == coreID) {
|
||||||
|
actualCoreID = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (actualCoreID == -1) {
|
||||||
|
Log(EWarn, "Thread::setCoreAffinity(): out of bounds: %i/%i cores available, requested #%i!",
|
||||||
|
available, nCores, coreID);
|
||||||
|
CPU_FREE(cpuset);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
CPU_ZERO_S(size, cpuset);
|
||||||
|
CPU_SET_S(actualCoreID, size, cpuset);
|
||||||
|
|
||||||
|
retval = pthread_setaffinity_np(threadID, size, cpuset);
|
||||||
|
if (retval) {
|
||||||
|
Log(EWarn, "Thread::setCoreAffinity(): pthread_setaffinity_np: failed: %s", strerror(retval));
|
||||||
|
CPU_FREE(cpuset);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
CPU_FREE(cpuset);
|
CPU_FREE(cpuset);
|
||||||
#elif defined(__WINDOWS__)
|
#elif defined(__WINDOWS__)
|
||||||
int nCores = getCoreCount();
|
int nCores = getCoreCount();
|
||||||
|
|
|
@ -143,19 +143,50 @@ void freeAligned(void *ptr) {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int __cached_core_count = 0;
|
||||||
|
|
||||||
int getCoreCount() {
|
int getCoreCount() {
|
||||||
|
// assumes atomic word size memory access
|
||||||
|
if (__cached_core_count)
|
||||||
|
return __cached_core_count;
|
||||||
|
|
||||||
#if defined(__WINDOWS__)
|
#if defined(__WINDOWS__)
|
||||||
SYSTEM_INFO sys_info;
|
SYSTEM_INFO sys_info;
|
||||||
GetSystemInfo(&sys_info);
|
GetSystemInfo(&sys_info);
|
||||||
|
__cached_core_count = sys_info.dwNumberOfProcessors;
|
||||||
return sys_info.dwNumberOfProcessors;
|
return sys_info.dwNumberOfProcessors;
|
||||||
#elif defined(__OSX__)
|
#elif defined(__OSX__)
|
||||||
int nprocs;
|
int nprocs;
|
||||||
size_t nprocsSize = sizeof(int);
|
size_t nprocsSize = sizeof(int);
|
||||||
if (sysctlbyname("hw.activecpu", &nprocs, &nprocsSize, NULL, 0))
|
if (sysctlbyname("hw.activecpu", &nprocs, &nprocsSize, NULL, 0))
|
||||||
SLog(EError, "Could not detect the number of processors!");
|
SLog(EError, "Could not detect the number of processors!");
|
||||||
return (int) nprocs;
|
__cached_core_count = nprocs;
|
||||||
|
return nprocs;
|
||||||
#else
|
#else
|
||||||
return sysconf(_SC_NPROCESSORS_CONF);
|
/* Determine the number of present cores */
|
||||||
|
int nCores = sysconf(_SC_NPROCESSORS_CONF);
|
||||||
|
|
||||||
|
/* Some of the cores may not be available to the user
|
||||||
|
(e.g. on certain cluster nodes) -- determine the number
|
||||||
|
of actual available cores here. */
|
||||||
|
size_t size = CPU_ALLOC_SIZE(nCores);
|
||||||
|
cpu_set_t *cpuset = CPU_ALLOC(nCores);
|
||||||
|
CPU_ZERO_S(size, cpuset);
|
||||||
|
int retval = pthread_getaffinity_np(pthread_self(), size, cpuset);
|
||||||
|
if (retval) {
|
||||||
|
SLog(EWarn, "getCoreCount(): pthread_getaffinity_np(): could "
|
||||||
|
"not read thread affinity map: %s", strerror(retval));
|
||||||
|
__cached_core_count = nCores;
|
||||||
|
CPU_FREE(cpuset);
|
||||||
|
return nCores;
|
||||||
|
}
|
||||||
|
|
||||||
|
int availableCores = 0;
|
||||||
|
for (int i=0; i<nCores; ++i)
|
||||||
|
availableCores += CPU_ISSET_S(i, size, cpuset) ? 1 : 0;
|
||||||
|
CPU_FREE(cpuset);
|
||||||
|
__cached_core_count = availableCores;
|
||||||
|
return availableCores;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -130,7 +130,8 @@ int mitsuba_app(int argc, char **argv) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
/* Default settings */
|
/* Default settings */
|
||||||
int nprocs = getCoreCount(), numParallelScenes = 1;
|
int nprocs_avail = getCoreCount(), nprocs = nprocs_avail;
|
||||||
|
int numParallelScenes = 1;
|
||||||
std::string nodeName = getHostName(),
|
std::string nodeName = getHostName(),
|
||||||
networkHosts = "", destFile="";
|
networkHosts = "", destFile="";
|
||||||
bool quietMode = false, progressBars = true, skipExisting = false;
|
bool quietMode = false, progressBars = true, skipExisting = false;
|
||||||
|
@ -257,8 +258,10 @@ int mitsuba_app(int argc, char **argv) {
|
||||||
|
|
||||||
/* Configure the scheduling subsystem */
|
/* Configure the scheduling subsystem */
|
||||||
Scheduler *scheduler = Scheduler::getInstance();
|
Scheduler *scheduler = Scheduler::getInstance();
|
||||||
|
bool useCoreAffinity = nprocs == nprocs_avail;
|
||||||
for (int i=0; i<nprocs; ++i)
|
for (int i=0; i<nprocs; ++i)
|
||||||
scheduler->registerWorker(new LocalWorker(i, formatString("wrk%i", i)));
|
scheduler->registerWorker(new LocalWorker(useCoreAffinity ? i : -1,
|
||||||
|
formatString("wrk%i", i)));
|
||||||
std::vector<std::string> hosts = tokenize(networkHosts, ";");
|
std::vector<std::string> hosts = tokenize(networkHosts, ";");
|
||||||
|
|
||||||
/* Establish network connections to nested servers */
|
/* Establish network connections to nested servers */
|
||||||
|
|
|
@ -141,7 +141,7 @@ int mtsutil(int argc, char **argv) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
/* Default settings */
|
/* Default settings */
|
||||||
int nprocs = getCoreCount();
|
int nprocs_avail = getCoreCount(), nprocs = nprocs_avail;
|
||||||
std::string nodeName = getHostName(),
|
std::string nodeName = getHostName(),
|
||||||
networkHosts = "", destFile="";
|
networkHosts = "", destFile="";
|
||||||
bool quietMode = false;
|
bool quietMode = false;
|
||||||
|
@ -233,8 +233,11 @@ int mtsutil(int argc, char **argv) {
|
||||||
|
|
||||||
/* Configure the scheduling subsystem */
|
/* Configure the scheduling subsystem */
|
||||||
Scheduler *scheduler = Scheduler::getInstance();
|
Scheduler *scheduler = Scheduler::getInstance();
|
||||||
|
bool useCoreAffinity = nprocs == nprocs_avail;
|
||||||
for (int i=0; i<nprocs; ++i)
|
for (int i=0; i<nprocs; ++i)
|
||||||
scheduler->registerWorker(new LocalWorker(i, formatString("wrk%i", i)));
|
scheduler->registerWorker(new LocalWorker(useCoreAffinity ? i : -1,
|
||||||
|
formatString("wrk%i", i)));
|
||||||
|
|
||||||
std::vector<std::string> hosts = tokenize(networkHosts, ";");
|
std::vector<std::string> hosts = tokenize(networkHosts, ";");
|
||||||
|
|
||||||
/* Establish network connections to nested servers */
|
/* Establish network connections to nested servers */
|
||||||
|
|
|
@ -53,7 +53,7 @@
|
||||||
|
|
||||||
extern bool create_symlinks();
|
extern bool create_symlinks();
|
||||||
|
|
||||||
static int localWorkerCtr = 0, remoteWorkerCtr = 0;
|
static int remoteWorkerCtr = 0;
|
||||||
|
|
||||||
MainWindow::MainWindow(QWidget *parent) :
|
MainWindow::MainWindow(QWidget *parent) :
|
||||||
QMainWindow(parent), ui(new Ui::MainWindow),
|
QMainWindow(parent), ui(new Ui::MainWindow),
|
||||||
|
@ -310,8 +310,10 @@ bool MainWindow::initWorkersProcessArgv() {
|
||||||
|
|
||||||
m_workerPriority = (Thread::EThreadPriority)
|
m_workerPriority = (Thread::EThreadPriority)
|
||||||
settings.value("workerPriority", (int) Thread::ELowPriority).toInt();
|
settings.value("workerPriority", (int) Thread::ELowPriority).toInt();
|
||||||
|
bool useCoreAffinity = localWorkerCount == getCoreCount();
|
||||||
for (int i=0; i<localWorkerCount; ++i)
|
for (int i=0; i<localWorkerCount; ++i)
|
||||||
scheduler->registerWorker(new LocalWorker(i, formatString("wrk%i", localWorkerCtr++), m_workerPriority));
|
scheduler->registerWorker(new LocalWorker(useCoreAffinity ? i : -1,
|
||||||
|
formatString("wrk%i", i), m_workerPriority));
|
||||||
|
|
||||||
int networkConnections = 0;
|
int networkConnections = 0;
|
||||||
QList<QVariant> connectionData = settings.value("connections").toList();
|
QList<QVariant> connectionData = settings.value("connections").toList();
|
||||||
|
@ -359,7 +361,7 @@ bool MainWindow::initWorkersProcessArgv() {
|
||||||
QMessageBox::warning(this, tr("Scheduler warning"),
|
QMessageBox::warning(this, tr("Scheduler warning"),
|
||||||
tr("There must be at least one worker thread -- forcing creation of one."),
|
tr("There must be at least one worker thread -- forcing creation of one."),
|
||||||
QMessageBox::Ok);
|
QMessageBox::Ok);
|
||||||
scheduler->registerWorker(new LocalWorker(0, formatString("wrk%i", localWorkerCtr++), m_workerPriority));
|
scheduler->registerWorker(new LocalWorker(-1, formatString("wrk%i", 0), m_workerPriority));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i=0; i<toBeLoaded.size(); ++i)
|
for (int i=0; i<toBeLoaded.size(); ++i)
|
||||||
|
@ -1355,17 +1357,24 @@ void MainWindow::on_actionSettings_triggered() {
|
||||||
if (localWorkersChanged || m_connections != d.getConnections()) {
|
if (localWorkersChanged || m_connections != d.getConnections()) {
|
||||||
ref<Scheduler> sched = Scheduler::getInstance();
|
ref<Scheduler> sched = Scheduler::getInstance();
|
||||||
sched->pause();
|
sched->pause();
|
||||||
while (d.getLocalWorkerCount() > (int) localWorkers.size()) {
|
|
||||||
LocalWorker *worker = new LocalWorker(localWorkerCtr, formatString("wrk%i", localWorkerCtr), m_workerPriority);
|
if (localWorkers.size() != d.getLocalWorkerCount()) {
|
||||||
localWorkerCtr++;
|
/* Completely remove old workers so that CPU affinities can be reassigned */
|
||||||
sched->registerWorker(worker);
|
while (!localWorkers.empty()) {
|
||||||
localWorkers.push_back(worker);
|
Worker *worker = localWorkers.back();
|
||||||
}
|
sched->unregisterWorker(worker);
|
||||||
while (d.getLocalWorkerCount() < (int) localWorkers.size()) {
|
localWorkers.pop_back();
|
||||||
Worker *worker = localWorkers.back();
|
}
|
||||||
sched->unregisterWorker(worker);
|
int workerCount = std::max(1, d.getLocalWorkerCount());
|
||||||
localWorkers.pop_back();
|
bool useCoreAffinity = workerCount == getCoreCount();
|
||||||
|
for (int i=0; i<workerCount; ++i) {
|
||||||
|
LocalWorker *worker = new LocalWorker(useCoreAffinity ? i : -1,
|
||||||
|
formatString("wrk%i", i), m_workerPriority);
|
||||||
|
sched->registerWorker(worker);
|
||||||
|
localWorkers.push_back(worker);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
QList<ServerConnection> removeList,
|
QList<ServerConnection> removeList,
|
||||||
&newConnections = d.getConnections();
|
&newConnections = d.getConnections();
|
||||||
for (int i=0; i<m_connections.size(); ++i) {
|
for (int i=0; i<m_connections.size(); ++i) {
|
||||||
|
|
Loading…
Reference in New Issue