diff --git a/include/mitsuba/core/util.h b/include/mitsuba/core/util.h index d17d9390..98b02281 100644 --- a/include/mitsuba/core/util.h +++ b/include/mitsuba/core/util.h @@ -104,12 +104,6 @@ extern MTS_EXPORT_CORE std::string formatString(const char *pFmt, ...); /// Base-2 logarithm extern MTS_EXPORT_CORE Float log2(Float value); -/// Base-2 logarithm (32-bit integer version) -extern MTS_EXPORT_CORE int log2i(uint32_t value); - -/// Base-2 logarithm (64-bit integer version) -extern MTS_EXPORT_CORE int log2i(uint64_t value); - /// Friendly modulo function (always positive) extern MTS_EXPORT_CORE int modulo(int a, int b); @@ -117,21 +111,23 @@ extern MTS_EXPORT_CORE int modulo(int a, int b); inline int floorToInt(Float value) { return (int) std::floor(value); } +/// Base-2 logarithm (32-bit integer version) +extern MTS_EXPORT_CORE int log2i(uint32_t value); -/// Check if an integer is a power of two (32 bit version) -inline bool isPowerOfTwo(uint32_t i) { - return (i & (i-1)) == 0; -} +/// Base-2 logarithm (64-bit integer version) +extern MTS_EXPORT_CORE int log2i(uint64_t value); + +/// Check if an integer is a power of two (unsigned 32 bit version) +inline bool isPowerOfTwo(uint32_t i) { return (i & (i-1)) == 0; } /// Check if an integer is a power of two (signed 32 bit version) -inline bool isPowerOfTwo(int32_t i) { - return i > 0 && (i & (i-1)) == 0; -} +inline bool isPowerOfTwo(int32_t i) { return i > 0 && (i & (i-1)) == 0; } /// Check if an integer is a power of two (64 bit version) -inline bool isPowerOfTwo(uint64_t i) { - return (i & (i-1)) == 0; -} +inline bool isPowerOfTwo(uint64_t i) { return (i & (i-1)) == 0; } + +/// Check if an integer is a power of two (signed 64 bit version) +inline bool isPowerOfTwo(int64_t i) { return i > 0 && (i & (i-1)) == 0; } /// Round an integer to the next power of two extern MTS_EXPORT_CORE uint32_t roundToPowerOfTwo(uint32_t i); diff --git a/include/mitsuba/hw/nsglsession.h b/include/mitsuba/hw/nsglsession.h index 018567fe..3f9f6ad5 100644 --- a/include/mitsuba/hw/nsglsession.h +++ b/include/mitsuba/hw/nsglsession.h @@ -36,6 +36,9 @@ public: /// Shut the session down void shutdown(); + /// Process all events and call event callbacks + void processEvents(); + /** * \brief Process all events and call event callbacks. * diff --git a/src/librender/noise.cpp b/src/librender/noise.cpp index 49203bfb..84c5fef1 100644 --- a/src/librender/noise.cpp +++ b/src/librender/noise.cpp @@ -43,19 +43,22 @@ static int NoisePerm[2 * NOISE_PERM_SIZE] = { inline static Float grad(int x, int y, int z, Float dx, Float dy, Float dz) { int h = NoisePerm[NoisePerm[NoisePerm[x]+y]+z]; h &= 15; -#if defined(GRAD_PBRT) - Float u = h<8 || h==12 || h==13 ? dx : dy; - Float v = h<4 || h==12 || h==13 ? dy : dz; -#elif defined(GRAD_PERLIN) +#if defined(GRAD_PERLIN) + /* Based on Ken Perlin's improved Noise reference implementation */ Float u = h<8 ? dx : dy; Float v = h<4 ? dy : h==12 || h==14 ? dx : dz; +#elif defined(GRAD_PBRT) + /* PBRT's implementation uses the hashes somewhat + differently. Possibly, this is just a typo */ + Float u = h<8 || h==12 || h==13 ? dx : dy; + Float v = h<4 || h==12 || h==13 ? dy : dz; #endif return ((h&1) ? -u : u) + ((h&2) ? -v : v); } inline static Float noiseWeight(Float t) { - Float t3 = t*t*t, t4 = t3*t; - return 6.0f*t4*t - 15.0f*t4 + 10.0f*t3; + Float t3 = t*t*t, t4 = t3*t, t5 = t4*t; + return 6.0f*t5 - 15.0f*t4 + 10.0f*t3; } Float Noise::perlinNoise(const Point &p) { diff --git a/src/luminaires/spot.cpp b/src/luminaires/spot.cpp index 993dec91..eb475de2 100644 --- a/src/luminaires/spot.cpp +++ b/src/luminaires/spot.cpp @@ -137,7 +137,7 @@ public: Spectrum sampleEmissionDirection(EmissionRecord &eRec, const Point2 &sample) const { m_luminaireToWorld(squareToCone(m_cosCutoffAngle, sample), eRec.d); eRec.pdfDir = squareToConePdf(m_cosCutoffAngle); - return Spectrum(falloffCurve(eRec.d, true)); + return falloffCurve(eRec.d, true); } void pdfEmission(EmissionRecord &eRec, bool delta) const { diff --git a/src/volume/SConscript b/src/volume/SConscript index ab3df12e..438c2c26 100644 --- a/src/volume/SConscript +++ b/src/volume/SConscript @@ -3,5 +3,6 @@ Import('env', 'plugins') plugins += env.SharedLibrary('#plugins/constvolume', ['constvolume.cpp']) plugins += env.SharedLibrary('#plugins/gridvolume', ['gridvolume.cpp']) plugins += env.SharedLibrary('#plugins/hgridvolume', ['hgridvolume.cpp']) +plugins += env.SharedLibrary('#plugins/volcache', ['volcache.cpp']) Export('plugins') diff --git a/src/volume/volcache.cpp b/src/volume/volcache.cpp new file mode 100644 index 00000000..cb0b82a1 --- /dev/null +++ b/src/volume/volcache.cpp @@ -0,0 +1,308 @@ +/* + This file is part of Mitsuba, a physically based rendering system. + + Copyright (c) 2007-2010 by Wenzel Jakob and others. + + Mitsuba is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License Version 3 + as published by the Free Software Foundation. + + Mitsuba is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include +#include +#include + +MTS_NAMESPACE_BEGIN + +static StatsCounter statsHitRate("Volume cache", "Cache hit rate", EPercentage); +static StatsCounter statsCreate("Volume cache", "Block creations"); +static StatsCounter statsDestruct("Volume cache", "Block destructions"); +static StatsCounter statsEmpty("Volume cache", "Empty blocks", EPercentage); + +/* Lexicographic ordering for Vector3i */ +struct Vector3iKeyOrder : public std::binary_function { + inline bool operator()(const Vector3i &v1, const Vector3i &v2) const { + if (v1.x < v2.x) return true; + else if (v1.x > v2.x) return false; + if (v1.y < v2.y) return true; + else if (v1.y > v2.y) return false; + if (v1.z < v2.z) return true; + else if (v1.z > v2.z) return false; + return false; + } +}; + +/** + * This class sits in between the renderer and another data source, for which + * caches all data lookups using a LRU scheme. This is useful if the nested + * volume data source is expensive to evaluate. + */ +class CachingDataSource : public VolumeDataSource { +public: + typedef LRUCache BlockCache; + + CachingDataSource(const Properties &props) + : VolumeDataSource(props) { + /// Size of an individual block (must be a power of 2) + m_blockSize = props.getInteger("blockSize", 4); + + if (!isPowerOfTwo(m_blockSize)) + Log(EError, "Block size must be a power of two!"); + + /* Width of an individual voxel. Will use the step size of the + nested medium by default */ + m_voxelWidth = props.getFloat("voxelWidth", -1); + + /* Permissible memory usage in MiB. Default: 1GiB */ + m_memoryLimit = (size_t) props.getLong("memoryLimit", 32) * 1024 * 1024; + + m_stepSizeMultiplier = (Float) props.getFloat("stepSizeMultiplier", 1.0f); + + m_volumeToWorld = props.getTransform("toWorld", Transform()); + } + + CachingDataSource(Stream *stream, InstanceManager *manager) + : VolumeDataSource(stream, manager) { + m_nested = static_cast(manager->getInstance(stream)); + configure(); + } + + virtual ~CachingDataSource() { + } + + void serialize(Stream *stream, InstanceManager *manager) const { + VolumeDataSource::serialize(stream, manager); + manager->serialize(stream, m_nested.get()); + } + + void configure() { + if (m_nested == NULL) + Log(EError, "A nested volume data source is needed!"); + m_aabb = m_nested->getAABB(); + if (m_voxelWidth == -1) + m_voxelWidth = m_nested->getStepSize(); + + size_t memoryLimitPerCore = m_memoryLimit + / std::max((size_t) 1, Scheduler::getInstance()->getLocalWorkerCount()); + + Vector totalCells = m_aabb.getExtents() / m_voxelWidth; + for (int i=0; i<3; ++i) + m_cellCount[i] = (int) std::ceil(totalCells[i]); + + if (m_nested->supportsFloatLookups()) + m_channels = 1; + else if (m_nested->supportsVectorLookups()) + m_channels = 1; + else if (m_nested->supportsSpectrumLookups()) + m_channels = SPECTRUM_SAMPLES; + else + Log(EError, "Nested volume offers no access methods!"); + + m_blockRes = m_blockSize+1; + int blockMemoryUsage = (int) std::pow((Float) m_blockRes, 3) * m_channels * sizeof(float); + m_blocksPerCore = memoryLimitPerCore / blockMemoryUsage; + + m_worldToVolume = m_volumeToWorld.inverse(); + m_worldToGrid = Transform::scale(Vector(1/m_voxelWidth)) + * Transform::translate(-Vector(m_aabb.min)) * m_worldToVolume; + m_voxelMask = m_blockSize-1; + m_blockMask = ~(m_blockSize-1); + m_blockShift = log2i((uint32_t) m_blockSize); + + Log(EInfo, "Volume cache configuration"); + Log(EInfo, " Block size in voxels = %i", m_blockSize); + Log(EInfo, " Voxel width = %f", m_voxelWidth); + Log(EInfo, " Memory usage of one block = %s", memString(blockMemoryUsage).c_str()); + Log(EInfo, " Memory limit = %s", memString(m_memoryLimit).c_str()); + Log(EInfo, " Memory limit per core = %s", memString(memoryLimitPerCore).c_str()); + Log(EInfo, " Max. blocks per core = %i", m_blocksPerCore); + Log(EInfo, " Effective resolution = %s", totalCells.toString().c_str()); + Log(EInfo, " Effective storage = %s", memString( + totalCells[0]*totalCells[1]*totalCells[2]*sizeof(float)*m_channels).c_str()); + } + + Float lookupFloat(const Point &_p) const { + const Point p = m_worldToGrid.transformAffine(_p); + int x = (int) p.x, y = (int) p.y, z = (int) p.z; + + if (EXPECT_NOT_TAKEN( + x < 0 || x >= m_cellCount.x || + y < 0 || y >= m_cellCount.y || + z < 0 || z >= m_cellCount.z)) + return 0.0f; + + BlockCache *cache = m_cache.get(); + if (EXPECT_NOT_TAKEN(cache == NULL)) { + cache = new BlockCache(m_blocksPerCore, + boost::bind(&CachingDataSource::renderBlock, this, _1), + boost::bind(&CachingDataSource::destroyBlock, this, _1)); + m_cache.set(cache); + } + +#if defined(VOLCACHE_DEBUG) + if (cache->isFull()) { + /* For debugging: when the cache is full, dump locations + of all cache records into an OBJ file and exit */ + std::vector keys; + cache->get_keys(std::back_inserter(keys)); + + std::ofstream os("keys.obj"); + os << "o Keys" << endl; + for (size_t i=0; iget(Vector3i( + (x & m_blockMask) >> m_blockShift, + (y & m_blockMask) >> m_blockShift, + (z & m_blockMask) >> m_blockShift), hit); + + statsHitRate.incrementBase(); + if (hit) + ++statsHitRate; + + if (blockData == NULL) + return 0.0f; + + const int x1 = x & m_voxelMask, y1 = y & m_voxelMask, z1 = z & m_voxelMask, + x2 = x1 + 1, y2 = y1 + 1, z2 = z1 + 1; + + const Float fx = p.x - x, fy = p.y - y, fz = p.z - z, + _fx = 1.0f - fx, _fy = 1.0f - fy, _fz = 1.0f - fz; + + const float + &d000 = blockData[(z1*m_blockRes + y1)*m_blockRes + x1], + &d001 = blockData[(z1*m_blockRes + y1)*m_blockRes + x2], + &d010 = blockData[(z1*m_blockRes + y2)*m_blockRes + x1], + &d011 = blockData[(z1*m_blockRes + y2)*m_blockRes + x2], + &d100 = blockData[(z2*m_blockRes + y1)*m_blockRes + x1], + &d101 = blockData[(z2*m_blockRes + y1)*m_blockRes + x2], + &d110 = blockData[(z2*m_blockRes + y2)*m_blockRes + x1], + &d111 = blockData[(z2*m_blockRes + y2)*m_blockRes + x2]; + + float result = ((d000*_fx + d001*fx)*_fy + + (d010*_fx + d011*fx)*fy)*_fz + + ((d100*_fx + d101*fx)*_fy + + (d110*_fx + d111*fx)*fy)*fz; + + return result; + } + + Spectrum lookupSpectrum(const Point &_p) const { + return Spectrum(0.0f); + } + + Vector lookupVector(const Point &_p) const { + return Vector(0.0f); + } + + bool supportsFloatLookups() const { + return m_nested->supportsFloatLookups(); + } + + bool supportsSpectrumLookups() const { + return m_nested->supportsSpectrumLookups(); + } + + bool supportsVectorLookups() const { + return m_nested->supportsVectorLookups(); + } + + Float getStepSize() const { + return m_voxelWidth * m_stepSizeMultiplier; + } + + void addChild(const std::string &name, ConfigurableObject *child) { + if (child->getClass()->derivesFrom(VolumeDataSource::m_theClass)) { + Assert(m_nested == NULL); + m_nested = static_cast(child); + } else { + VolumeDataSource::addChild(name, child); + } + } + + float *renderBlock(const Vector3i &blockIdx) const { + float *result = new float[m_blockRes*m_blockRes*m_blockRes]; + Point offset = m_aabb.min + Vector( + blockIdx.x * m_blockSize * m_voxelWidth, + blockIdx.y * m_blockSize * m_voxelWidth, + blockIdx.z * m_blockSize * m_voxelWidth); + + int idx = 0; + bool nonempty = false; + for (int z = 0; zlookupFloat(p); + result[idx++] = value; + nonempty |= (value != 0); + } + } + } + + ++statsCreate; + statsEmpty.incrementBase(); + + if (nonempty) { + return result; + } else { + ++statsEmpty; + delete[] result; + return NULL; + } + } + + void destroyBlock(float *ptr) const { + ++statsDestruct; + delete[] ptr; + } + + MTS_DECLARE_CLASS() +protected: + ref m_nested; + Transform m_volumeToWorld; + Transform m_worldToVolume; + Transform m_worldToGrid; + Float m_voxelWidth; + Float m_stepSizeMultiplier; + size_t m_memoryLimit; + size_t m_blocksPerCore; + int m_channels; + int m_blockSize, m_blockRes; + int m_blockMask, m_voxelMask, m_blockShift; + Vector3i m_cellCount; + mutable ThreadLocal m_cache; +}; + +MTS_IMPLEMENT_CLASS_S(CachingDataSource, false, VolumeDataSource); +MTS_EXPORT_PLUGIN(CachingDataSource, "Caching data source"); +MTS_NAMESPACE_END