Fixed core affinity handling, specified TLS dialect on Linux
- Fixed core affinity handling for Linux clusters with CPU quotas (e.g. the ETH Brutus cluster). - Added an explicit -mtls-dialect flag to the Linux config.py files to prevent crashes when loading the Python plugin in certain settings (particularly: the ETH cluster).metadata
parent
a1eecbb55a
commit
fbb4867f49
|
@ -4,7 +4,7 @@ BUILDDIR = '#build/debug'
|
|||
DISTDIR = '#dist'
|
||||
CXX = 'g++'
|
||||
CC = 'gcc'
|
||||
CXXFLAGS = ['-O0', '-Wall', '-g', '-pipe', '-march=nocona', '-msse2', '-ftree-vectorize', '-mfpmath=sse', '-funsafe-math-optimizations', '-fno-rounding-math', '-fno-signaling-nans', '-fno-math-errno', '-fno-omit-frame-pointer', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fopenmp', '-fvisibility=hidden']
|
||||
CXXFLAGS = ['-O0', '-Wall', '-g', '-pipe', '-march=nocona', '-msse2', '-ftree-vectorize', '-mfpmath=sse', '-funsafe-math-optimizations', '-fno-rounding-math', '-fno-signaling-nans', '-fno-math-errno', '-fno-omit-frame-pointer', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fopenmp', '-fvisibility=hidden', '-mtls-dialect=gnu2']
|
||||
LINKFLAGS = []
|
||||
SHLINKFLAGS = ['-rdynamic', '-shared', '-fPIC', '-lstdc++']
|
||||
BASEINCLUDE = ['#include']
|
||||
|
|
|
@ -4,7 +4,7 @@ BUILDDIR = '#build/release'
|
|||
DISTDIR = '#dist'
|
||||
CXX = 'g++'
|
||||
CC = 'gcc'
|
||||
CXXFLAGS = ['-O3', '-Wall', '-g', '-pipe', '-march=nocona', '-msse2', '-ftree-vectorize', '-mfpmath=sse', '-funsafe-math-optimizations', '-fno-rounding-math', '-fno-signaling-nans', '-fno-math-errno', '-fomit-frame-pointer', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fopenmp', '-fvisibility=hidden']
|
||||
CXXFLAGS = ['-O3', '-Wall', '-g', '-pipe', '-march=nocona', '-msse2', '-ftree-vectorize', '-mfpmath=sse', '-funsafe-math-optimizations', '-fno-rounding-math', '-fno-signaling-nans', '-fno-math-errno', '-fomit-frame-pointer', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fopenmp', '-fvisibility=hidden', '-mtls-dialect=gnu2']
|
||||
LINKFLAGS = []
|
||||
SHLINKFLAGS = ['-rdynamic', '-shared', '-fPIC', '-lstdc++']
|
||||
BASEINCLUDE = ['#include']
|
||||
|
|
|
@ -758,6 +758,21 @@ protected:
|
|||
*/
|
||||
class MTS_EXPORT_CORE LocalWorker : public Worker {
|
||||
public:
|
||||
/**
|
||||
* \brief Create a new local worker thread
|
||||
*
|
||||
* \param coreID
|
||||
* When an CPU core ID (>=0) is specified here, the worker
|
||||
* thread will attempt to register core affinity with the
|
||||
* operating system. Passing -1 disables this.
|
||||
*
|
||||
* \param name
|
||||
* An identifying string for this thread
|
||||
*
|
||||
* \param priority
|
||||
* The desired thread priority (not supported on some
|
||||
* operating systems)
|
||||
*/
|
||||
LocalWorker(int coreID, const std::string &name,
|
||||
Thread::EThreadPriority priority = Thread::ENormalPriority);
|
||||
|
||||
|
|
|
@ -633,6 +633,7 @@ void Worker::start(Scheduler *scheduler, int workerIndex, int coreOffset) {
|
|||
|
||||
LocalWorker::LocalWorker(int coreID, const std::string &name,
|
||||
Thread::EThreadPriority priority) : Worker(name) {
|
||||
if (coreID >= 0)
|
||||
setCoreAffinity(coreID);
|
||||
m_coreCount = 1;
|
||||
#if !defined(__LINUX__)
|
||||
|
|
|
@ -324,24 +324,51 @@ void Thread::setCoreAffinity(int coreID) {
|
|||
if (getenv("VALGRIND_OPTS") != NULL)
|
||||
return;
|
||||
|
||||
int nCores = getCoreCount();
|
||||
cpu_set_t *cpuset = CPU_ALLOC(nCores);
|
||||
if (cpuset == NULL)
|
||||
Log(EError, "Thread::setCoreAffinity(): could not allocate cpu_set_t");
|
||||
|
||||
int nCores = sysconf(_SC_NPROCESSORS_CONF);
|
||||
size_t size = CPU_ALLOC_SIZE(nCores);
|
||||
cpu_set_t *cpuset = CPU_ALLOC(nCores);
|
||||
CPU_ZERO_S(size, cpuset);
|
||||
if (coreID != -1 && coreID < nCores) {
|
||||
CPU_SET_S(coreID, size, cpuset);
|
||||
} else {
|
||||
for (int i=0; i<nCores; ++i)
|
||||
CPU_SET_S(i, size, cpuset);
|
||||
if (cpuset == NULL) {
|
||||
Log(EWarn, "Thread::setCoreAffinity(): could not allocate cpu_set_t");
|
||||
return;
|
||||
}
|
||||
|
||||
const pthread_t threadID = d->thread.native_handle();
|
||||
int retval = pthread_setaffinity_np(threadID, size, cpuset);
|
||||
if (retval)
|
||||
Log(EWarn, "Thread::setCoreAffinity(): pthread_setaffinity_np: failed: %s", strerror(errno));
|
||||
int retval = pthread_getaffinity_np(threadID, size, cpuset);
|
||||
if (retval) {
|
||||
Log(EWarn, "Thread::setCoreAffinity(): pthread_getaffinity_np(): could "
|
||||
"not read thread affinity map: %s", strerror(retval));
|
||||
CPU_FREE(cpuset);
|
||||
return;
|
||||
}
|
||||
|
||||
int actualCoreID = -1, available = 0;
|
||||
for (int i=0; i<nCores; ++i) {
|
||||
if (!CPU_ISSET_S(i, size, cpuset))
|
||||
continue;
|
||||
if (available++ == coreID) {
|
||||
actualCoreID = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (actualCoreID == -1) {
|
||||
Log(EWarn, "Thread::setCoreAffinity(): out of bounds: %i/%i cores available, requested #%i!",
|
||||
available, nCores, coreID);
|
||||
CPU_FREE(cpuset);
|
||||
return;
|
||||
}
|
||||
|
||||
CPU_ZERO_S(size, cpuset);
|
||||
CPU_SET_S(actualCoreID, size, cpuset);
|
||||
|
||||
retval = pthread_setaffinity_np(threadID, size, cpuset);
|
||||
if (retval) {
|
||||
Log(EWarn, "Thread::setCoreAffinity(): pthread_setaffinity_np: failed: %s", strerror(retval));
|
||||
CPU_FREE(cpuset);
|
||||
return;
|
||||
}
|
||||
|
||||
CPU_FREE(cpuset);
|
||||
#elif defined(__WINDOWS__)
|
||||
int nCores = getCoreCount();
|
||||
|
|
|
@ -143,19 +143,50 @@ void freeAligned(void *ptr) {
|
|||
#endif
|
||||
}
|
||||
|
||||
static int __cached_core_count = 0;
|
||||
|
||||
int getCoreCount() {
|
||||
// assumes atomic word size memory access
|
||||
if (__cached_core_count)
|
||||
return __cached_core_count;
|
||||
|
||||
#if defined(__WINDOWS__)
|
||||
SYSTEM_INFO sys_info;
|
||||
GetSystemInfo(&sys_info);
|
||||
__cached_core_count = sys_info.dwNumberOfProcessors;
|
||||
return sys_info.dwNumberOfProcessors;
|
||||
#elif defined(__OSX__)
|
||||
int nprocs;
|
||||
size_t nprocsSize = sizeof(int);
|
||||
if (sysctlbyname("hw.activecpu", &nprocs, &nprocsSize, NULL, 0))
|
||||
SLog(EError, "Could not detect the number of processors!");
|
||||
return (int) nprocs;
|
||||
__cached_core_count = nprocs;
|
||||
return nprocs;
|
||||
#else
|
||||
return sysconf(_SC_NPROCESSORS_CONF);
|
||||
/* Determine the number of present cores */
|
||||
int nCores = sysconf(_SC_NPROCESSORS_CONF);
|
||||
|
||||
/* Some of the cores may not be available to the user
|
||||
(e.g. on certain cluster nodes) -- determine the number
|
||||
of actual available cores here. */
|
||||
size_t size = CPU_ALLOC_SIZE(nCores);
|
||||
cpu_set_t *cpuset = CPU_ALLOC(nCores);
|
||||
CPU_ZERO_S(size, cpuset);
|
||||
int retval = pthread_getaffinity_np(pthread_self(), size, cpuset);
|
||||
if (retval) {
|
||||
SLog(EWarn, "getCoreCount(): pthread_getaffinity_np(): could "
|
||||
"not read thread affinity map: %s", strerror(retval));
|
||||
__cached_core_count = nCores;
|
||||
CPU_FREE(cpuset);
|
||||
return nCores;
|
||||
}
|
||||
|
||||
int availableCores = 0;
|
||||
for (int i=0; i<nCores; ++i)
|
||||
availableCores += CPU_ISSET_S(i, size, cpuset) ? 1 : 0;
|
||||
CPU_FREE(cpuset);
|
||||
__cached_core_count = availableCores;
|
||||
return availableCores;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -130,7 +130,8 @@ int mitsuba_app(int argc, char **argv) {
|
|||
|
||||
try {
|
||||
/* Default settings */
|
||||
int nprocs = getCoreCount(), numParallelScenes = 1;
|
||||
int nprocs_avail = getCoreCount(), nprocs = nprocs_avail;
|
||||
int numParallelScenes = 1;
|
||||
std::string nodeName = getHostName(),
|
||||
networkHosts = "", destFile="";
|
||||
bool quietMode = false, progressBars = true, skipExisting = false;
|
||||
|
@ -257,8 +258,10 @@ int mitsuba_app(int argc, char **argv) {
|
|||
|
||||
/* Configure the scheduling subsystem */
|
||||
Scheduler *scheduler = Scheduler::getInstance();
|
||||
bool useCoreAffinity = nprocs == nprocs_avail;
|
||||
for (int i=0; i<nprocs; ++i)
|
||||
scheduler->registerWorker(new LocalWorker(i, formatString("wrk%i", i)));
|
||||
scheduler->registerWorker(new LocalWorker(useCoreAffinity ? i : -1,
|
||||
formatString("wrk%i", i)));
|
||||
std::vector<std::string> hosts = tokenize(networkHosts, ";");
|
||||
|
||||
/* Establish network connections to nested servers */
|
||||
|
|
|
@ -141,7 +141,7 @@ int mtsutil(int argc, char **argv) {
|
|||
|
||||
try {
|
||||
/* Default settings */
|
||||
int nprocs = getCoreCount();
|
||||
int nprocs_avail = getCoreCount(), nprocs = nprocs_avail;
|
||||
std::string nodeName = getHostName(),
|
||||
networkHosts = "", destFile="";
|
||||
bool quietMode = false;
|
||||
|
@ -233,8 +233,11 @@ int mtsutil(int argc, char **argv) {
|
|||
|
||||
/* Configure the scheduling subsystem */
|
||||
Scheduler *scheduler = Scheduler::getInstance();
|
||||
bool useCoreAffinity = nprocs == nprocs_avail;
|
||||
for (int i=0; i<nprocs; ++i)
|
||||
scheduler->registerWorker(new LocalWorker(i, formatString("wrk%i", i)));
|
||||
scheduler->registerWorker(new LocalWorker(useCoreAffinity ? i : -1,
|
||||
formatString("wrk%i", i)));
|
||||
|
||||
std::vector<std::string> hosts = tokenize(networkHosts, ";");
|
||||
|
||||
/* Establish network connections to nested servers */
|
||||
|
|
|
@ -53,7 +53,7 @@
|
|||
|
||||
extern bool create_symlinks();
|
||||
|
||||
static int localWorkerCtr = 0, remoteWorkerCtr = 0;
|
||||
static int remoteWorkerCtr = 0;
|
||||
|
||||
MainWindow::MainWindow(QWidget *parent) :
|
||||
QMainWindow(parent), ui(new Ui::MainWindow),
|
||||
|
@ -310,8 +310,10 @@ bool MainWindow::initWorkersProcessArgv() {
|
|||
|
||||
m_workerPriority = (Thread::EThreadPriority)
|
||||
settings.value("workerPriority", (int) Thread::ELowPriority).toInt();
|
||||
bool useCoreAffinity = localWorkerCount == getCoreCount();
|
||||
for (int i=0; i<localWorkerCount; ++i)
|
||||
scheduler->registerWorker(new LocalWorker(i, formatString("wrk%i", localWorkerCtr++), m_workerPriority));
|
||||
scheduler->registerWorker(new LocalWorker(useCoreAffinity ? i : -1,
|
||||
formatString("wrk%i", i), m_workerPriority));
|
||||
|
||||
int networkConnections = 0;
|
||||
QList<QVariant> connectionData = settings.value("connections").toList();
|
||||
|
@ -359,7 +361,7 @@ bool MainWindow::initWorkersProcessArgv() {
|
|||
QMessageBox::warning(this, tr("Scheduler warning"),
|
||||
tr("There must be at least one worker thread -- forcing creation of one."),
|
||||
QMessageBox::Ok);
|
||||
scheduler->registerWorker(new LocalWorker(0, formatString("wrk%i", localWorkerCtr++), m_workerPriority));
|
||||
scheduler->registerWorker(new LocalWorker(-1, formatString("wrk%i", 0), m_workerPriority));
|
||||
}
|
||||
|
||||
for (int i=0; i<toBeLoaded.size(); ++i)
|
||||
|
@ -1355,17 +1357,24 @@ void MainWindow::on_actionSettings_triggered() {
|
|||
if (localWorkersChanged || m_connections != d.getConnections()) {
|
||||
ref<Scheduler> sched = Scheduler::getInstance();
|
||||
sched->pause();
|
||||
while (d.getLocalWorkerCount() > (int) localWorkers.size()) {
|
||||
LocalWorker *worker = new LocalWorker(localWorkerCtr, formatString("wrk%i", localWorkerCtr), m_workerPriority);
|
||||
localWorkerCtr++;
|
||||
sched->registerWorker(worker);
|
||||
localWorkers.push_back(worker);
|
||||
}
|
||||
while (d.getLocalWorkerCount() < (int) localWorkers.size()) {
|
||||
|
||||
if (localWorkers.size() != d.getLocalWorkerCount()) {
|
||||
/* Completely remove old workers so that CPU affinities can be reassigned */
|
||||
while (!localWorkers.empty()) {
|
||||
Worker *worker = localWorkers.back();
|
||||
sched->unregisterWorker(worker);
|
||||
localWorkers.pop_back();
|
||||
}
|
||||
int workerCount = std::max(1, d.getLocalWorkerCount());
|
||||
bool useCoreAffinity = workerCount == getCoreCount();
|
||||
for (int i=0; i<workerCount; ++i) {
|
||||
LocalWorker *worker = new LocalWorker(useCoreAffinity ? i : -1,
|
||||
formatString("wrk%i", i), m_workerPriority);
|
||||
sched->registerWorker(worker);
|
||||
localWorkers.push_back(worker);
|
||||
}
|
||||
}
|
||||
|
||||
QList<ServerConnection> removeList,
|
||||
&newConnections = d.getConnections();
|
||||
for (int i=0; i<m_connections.size(); ++i) {
|
||||
|
|
Loading…
Reference in New Issue