From fbb4867f49835bca00b4d85491fbf650f5fdeb7e Mon Sep 17 00:00:00 2001 From: Wenzel Jakob Date: Tue, 1 Jul 2014 22:35:30 +0200 Subject: [PATCH] Fixed core affinity handling, specified TLS dialect on Linux - Fixed core affinity handling for Linux clusters with CPU quotas (e.g. the ETH Brutus cluster). - Added an explicit -mtls-dialect flag to the Linux config.py files to prevent crashes when loading the Python plugin in certain settings (particularly: the ETH cluster). --- build/config-linux-gcc-debug.py | 2 +- build/config-linux-gcc.py | 2 +- include/mitsuba/core/sched.h | 15 ++++++++++ src/libcore/sched.cpp | 3 +- src/libcore/thread.cpp | 53 +++++++++++++++++++++++++-------- src/libcore/util.cpp | 35 ++++++++++++++++++++-- src/mitsuba/mitsuba.cpp | 7 +++-- src/mitsuba/mtsutil.cpp | 7 +++-- src/mtsgui/mainwindow.cpp | 35 ++++++++++++++-------- 9 files changed, 124 insertions(+), 35 deletions(-) diff --git a/build/config-linux-gcc-debug.py b/build/config-linux-gcc-debug.py index ccd1d7e4..3ce1fbb0 100644 --- a/build/config-linux-gcc-debug.py +++ b/build/config-linux-gcc-debug.py @@ -4,7 +4,7 @@ BUILDDIR = '#build/debug' DISTDIR = '#dist' CXX = 'g++' CC = 'gcc' -CXXFLAGS = ['-O0', '-Wall', '-g', '-pipe', '-march=nocona', '-msse2', '-ftree-vectorize', '-mfpmath=sse', '-funsafe-math-optimizations', '-fno-rounding-math', '-fno-signaling-nans', '-fno-math-errno', '-fno-omit-frame-pointer', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fopenmp', '-fvisibility=hidden'] +CXXFLAGS = ['-O0', '-Wall', '-g', '-pipe', '-march=nocona', '-msse2', '-ftree-vectorize', '-mfpmath=sse', '-funsafe-math-optimizations', '-fno-rounding-math', '-fno-signaling-nans', '-fno-math-errno', '-fno-omit-frame-pointer', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fopenmp', '-fvisibility=hidden', '-mtls-dialect=gnu2'] LINKFLAGS = [] SHLINKFLAGS = ['-rdynamic', '-shared', '-fPIC', '-lstdc++'] BASEINCLUDE = ['#include'] diff --git a/build/config-linux-gcc.py b/build/config-linux-gcc.py index 5bc8b6ae..77c9b753 100644 --- a/build/config-linux-gcc.py +++ b/build/config-linux-gcc.py @@ -4,7 +4,7 @@ BUILDDIR = '#build/release' DISTDIR = '#dist' CXX = 'g++' CC = 'gcc' -CXXFLAGS = ['-O3', '-Wall', '-g', '-pipe', '-march=nocona', '-msse2', '-ftree-vectorize', '-mfpmath=sse', '-funsafe-math-optimizations', '-fno-rounding-math', '-fno-signaling-nans', '-fno-math-errno', '-fomit-frame-pointer', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fopenmp', '-fvisibility=hidden'] +CXXFLAGS = ['-O3', '-Wall', '-g', '-pipe', '-march=nocona', '-msse2', '-ftree-vectorize', '-mfpmath=sse', '-funsafe-math-optimizations', '-fno-rounding-math', '-fno-signaling-nans', '-fno-math-errno', '-fomit-frame-pointer', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fopenmp', '-fvisibility=hidden', '-mtls-dialect=gnu2'] LINKFLAGS = [] SHLINKFLAGS = ['-rdynamic', '-shared', '-fPIC', '-lstdc++'] BASEINCLUDE = ['#include'] diff --git a/include/mitsuba/core/sched.h b/include/mitsuba/core/sched.h index 8cc03bb0..d9daf0fc 100644 --- a/include/mitsuba/core/sched.h +++ b/include/mitsuba/core/sched.h @@ -758,6 +758,21 @@ protected: */ class MTS_EXPORT_CORE LocalWorker : public Worker { public: + /** + * \brief Create a new local worker thread + * + * \param coreID + * When an CPU core ID (>=0) is specified here, the worker + * thread will attempt to register core affinity with the + * operating system. Passing -1 disables this. + * + * \param name + * An identifying string for this thread + * + * \param priority + * The desired thread priority (not supported on some + * operating systems) + */ LocalWorker(int coreID, const std::string &name, Thread::EThreadPriority priority = Thread::ENormalPriority); diff --git a/src/libcore/sched.cpp b/src/libcore/sched.cpp index c95f2218..b6f97336 100644 --- a/src/libcore/sched.cpp +++ b/src/libcore/sched.cpp @@ -633,7 +633,8 @@ void Worker::start(Scheduler *scheduler, int workerIndex, int coreOffset) { LocalWorker::LocalWorker(int coreID, const std::string &name, Thread::EThreadPriority priority) : Worker(name) { - setCoreAffinity(coreID); + if (coreID >= 0) + setCoreAffinity(coreID); m_coreCount = 1; #if !defined(__LINUX__) /* Don't set thead priority on Linux, since it uses diff --git a/src/libcore/thread.cpp b/src/libcore/thread.cpp index d0351bb8..621ea42b 100644 --- a/src/libcore/thread.cpp +++ b/src/libcore/thread.cpp @@ -324,24 +324,51 @@ void Thread::setCoreAffinity(int coreID) { if (getenv("VALGRIND_OPTS") != NULL) return; - int nCores = getCoreCount(); - cpu_set_t *cpuset = CPU_ALLOC(nCores); - if (cpuset == NULL) - Log(EError, "Thread::setCoreAffinity(): could not allocate cpu_set_t"); - + int nCores = sysconf(_SC_NPROCESSORS_CONF); size_t size = CPU_ALLOC_SIZE(nCores); + cpu_set_t *cpuset = CPU_ALLOC(nCores); CPU_ZERO_S(size, cpuset); - if (coreID != -1 && coreID < nCores) { - CPU_SET_S(coreID, size, cpuset); - } else { - for (int i=0; ithread.native_handle(); - int retval = pthread_setaffinity_np(threadID, size, cpuset); - if (retval) - Log(EWarn, "Thread::setCoreAffinity(): pthread_setaffinity_np: failed: %s", strerror(errno)); + int retval = pthread_getaffinity_np(threadID, size, cpuset); + if (retval) { + Log(EWarn, "Thread::setCoreAffinity(): pthread_getaffinity_np(): could " + "not read thread affinity map: %s", strerror(retval)); + CPU_FREE(cpuset); + return; + } + + int actualCoreID = -1, available = 0; + for (int i=0; iregisterWorker(new LocalWorker(i, formatString("wrk%i", i))); + scheduler->registerWorker(new LocalWorker(useCoreAffinity ? i : -1, + formatString("wrk%i", i))); std::vector hosts = tokenize(networkHosts, ";"); /* Establish network connections to nested servers */ diff --git a/src/mitsuba/mtsutil.cpp b/src/mitsuba/mtsutil.cpp index 8d60be3f..2ef15222 100644 --- a/src/mitsuba/mtsutil.cpp +++ b/src/mitsuba/mtsutil.cpp @@ -141,7 +141,7 @@ int mtsutil(int argc, char **argv) { try { /* Default settings */ - int nprocs = getCoreCount(); + int nprocs_avail = getCoreCount(), nprocs = nprocs_avail; std::string nodeName = getHostName(), networkHosts = "", destFile=""; bool quietMode = false; @@ -233,8 +233,11 @@ int mtsutil(int argc, char **argv) { /* Configure the scheduling subsystem */ Scheduler *scheduler = Scheduler::getInstance(); + bool useCoreAffinity = nprocs == nprocs_avail; for (int i=0; iregisterWorker(new LocalWorker(i, formatString("wrk%i", i))); + scheduler->registerWorker(new LocalWorker(useCoreAffinity ? i : -1, + formatString("wrk%i", i))); + std::vector hosts = tokenize(networkHosts, ";"); /* Establish network connections to nested servers */ diff --git a/src/mtsgui/mainwindow.cpp b/src/mtsgui/mainwindow.cpp index 4ba79eff..2ebb2daa 100644 --- a/src/mtsgui/mainwindow.cpp +++ b/src/mtsgui/mainwindow.cpp @@ -53,7 +53,7 @@ extern bool create_symlinks(); -static int localWorkerCtr = 0, remoteWorkerCtr = 0; +static int remoteWorkerCtr = 0; MainWindow::MainWindow(QWidget *parent) : QMainWindow(parent), ui(new Ui::MainWindow), @@ -310,8 +310,10 @@ bool MainWindow::initWorkersProcessArgv() { m_workerPriority = (Thread::EThreadPriority) settings.value("workerPriority", (int) Thread::ELowPriority).toInt(); + bool useCoreAffinity = localWorkerCount == getCoreCount(); for (int i=0; iregisterWorker(new LocalWorker(i, formatString("wrk%i", localWorkerCtr++), m_workerPriority)); + scheduler->registerWorker(new LocalWorker(useCoreAffinity ? i : -1, + formatString("wrk%i", i), m_workerPriority)); int networkConnections = 0; QList connectionData = settings.value("connections").toList(); @@ -359,7 +361,7 @@ bool MainWindow::initWorkersProcessArgv() { QMessageBox::warning(this, tr("Scheduler warning"), tr("There must be at least one worker thread -- forcing creation of one."), QMessageBox::Ok); - scheduler->registerWorker(new LocalWorker(0, formatString("wrk%i", localWorkerCtr++), m_workerPriority)); + scheduler->registerWorker(new LocalWorker(-1, formatString("wrk%i", 0), m_workerPriority)); } for (int i=0; i sched = Scheduler::getInstance(); sched->pause(); - while (d.getLocalWorkerCount() > (int) localWorkers.size()) { - LocalWorker *worker = new LocalWorker(localWorkerCtr, formatString("wrk%i", localWorkerCtr), m_workerPriority); - localWorkerCtr++; - sched->registerWorker(worker); - localWorkers.push_back(worker); - } - while (d.getLocalWorkerCount() < (int) localWorkers.size()) { - Worker *worker = localWorkers.back(); - sched->unregisterWorker(worker); - localWorkers.pop_back(); + + if (localWorkers.size() != d.getLocalWorkerCount()) { + /* Completely remove old workers so that CPU affinities can be reassigned */ + while (!localWorkers.empty()) { + Worker *worker = localWorkers.back(); + sched->unregisterWorker(worker); + localWorkers.pop_back(); + } + int workerCount = std::max(1, d.getLocalWorkerCount()); + bool useCoreAffinity = workerCount == getCoreCount(); + for (int i=0; iregisterWorker(worker); + localWorkers.push_back(worker); + } } + QList removeList, &newConnections = d.getConnections(); for (int i=0; i