From 257526f6b27c7ac13294efc8d8b62c43c52d224c Mon Sep 17 00:00:00 2001 From: Wenzel Jakob Date: Fri, 25 Oct 2013 22:21:53 +0200 Subject: [PATCH] Fast FFT-based image convolution support for large kernels --- build/SConscript.configure | 11 +++ build/config-linux-gcc-debug.py | 1 + build/config-linux-gcc.py | 1 + build/config-linux-icl.py | 1 + include/mitsuba/core/bitmap.h | 18 ++++ include/mitsuba/core/util.h | 19 +++- src/libcore/SConscript | 6 ++ src/libcore/bitmap.cpp | 156 +++++++++++++++++++++++++++++++- src/libpython/core.cpp | 2 + src/mtsgui/aboutdlg.cpp | 4 + 10 files changed, 216 insertions(+), 3 deletions(-) diff --git a/build/SConscript.configure b/build/SConscript.configure index 9d4c1517..17e0dd1b 100644 --- a/build/SConscript.configure +++ b/build/SConscript.configure @@ -71,6 +71,9 @@ vars.Add('JPEGLIBDIR', 'libjpeg library path') vars.Add('COLLADAINCLUDE', 'COLLADA DOM include path') vars.Add('COLLADALIB', 'COLLADA DOM libraries') vars.Add('COLLADALIBDIR', 'COLLADA DOM library path') +vars.Add('FFTWINCLUDE', 'FFTW include path') +vars.Add('FFTWLIB', 'FFTW libraries') +vars.Add('FFTWLIBDIR', 'FFTW library path') vars.Add('SHLIBPREFIX', 'Prefix for shared libraries') vars.Add('SHLIBSUFFIX', 'Suffix for shared libraries') vars.Add('LIBPREFIX', 'Prefix for windows library files') @@ -163,6 +166,10 @@ if env.has_key('COLLADAINCLUDE'): env.Prepend(CPPPATH=env['COLLADAINCLUDE']) if env.has_key('COLLADALIBDIR'): env.Prepend(LIBPATH=env['COLLADALIBDIR']) +if env.has_key('FFTWINCLUDE'): + env.Prepend(CPPPATH=env['FFTWINCLUDE']) +if env.has_key('FFTWLIBDIR'): + env.Prepend(LIBPATH=env['FFTWLIBDIR']) if not conf.CheckCXX(): print 'Could not compile a simple C++ fragment, verify that ' + \ @@ -216,6 +223,10 @@ if not conf.TryCompile("#include \n#if BOOST_VERSION < 104400 if not conf.CheckCXXHeader('Eigen/Core'): print 'Eigen 3.x is missing (install libeigen3-dev)!' Exit(1) +if not conf.CheckCXXHeader('fftw3.h'): + print 'FFTW3 not found (install for fast image convolution support)' +else: + env.Append(CPPDEFINES = [['MTS_HAS_FFTW', 1]] ) if sys.platform == 'win32': if not (conf.CheckCHeader(['windows.h', 'GL/gl.h']) \ and conf.CheckCHeader(['windows.h', 'GL/glu.h']) \ diff --git a/build/config-linux-gcc-debug.py b/build/config-linux-gcc-debug.py index cd6ce15d..f9bc71ac 100644 --- a/build/config-linux-gcc-debug.py +++ b/build/config-linux-gcc-debug.py @@ -21,6 +21,7 @@ GLFLAGS = ['-DGLEW_MX'] BOOSTLIB = ['boost_system', 'boost_filesystem', 'boost_thread'] COLLADAINCLUDE = ['/usr/include/collada-dom', '/usr/include/collada-dom/1.4'] COLLADALIB = ['collada14dom', 'xml2'] +FFTWLIB = ['fftw3'] # The following assumes that the Mitsuba bindings should be built for the # "default" Python version. It is also possible to build bindings for multiple diff --git a/build/config-linux-gcc.py b/build/config-linux-gcc.py index 0567f27c..5630597c 100644 --- a/build/config-linux-gcc.py +++ b/build/config-linux-gcc.py @@ -21,6 +21,7 @@ GLFLAGS = ['-DGLEW_MX'] BOOSTLIB = ['boost_system', 'boost_filesystem', 'boost_thread'] COLLADAINCLUDE = ['/usr/include/collada-dom', '/usr/include/collada-dom/1.4'] COLLADALIB = ['collada14dom', 'xml2'] +FFTWLIB = ['fftw3'] # The following assumes that the Mitsuba bindings should be built for the # "default" Python version. It is also possible to build bindings for multiple diff --git a/build/config-linux-icl.py b/build/config-linux-icl.py index f1b12fc2..b93b7377 100644 --- a/build/config-linux-icl.py +++ b/build/config-linux-icl.py @@ -21,6 +21,7 @@ GLFLAGS = ['-DGLEW_MX'] BOOSTLIB = ['boost_system', 'boost_filesystem', 'boost_thread'] COLLADAINCLUDE = ['/usr/include/collada-dom', '/usr/include/collada-dom/1.4'] COLLADALIB = ['collada14dom', 'xml2'] +FFTWLIB = ['fftw3'] # The following assumes that the Mitsuba bindings should be built for the # "default" Python version. It is also possible to build bindings for multiple diff --git a/include/mitsuba/core/bitmap.h b/include/mitsuba/core/bitmap.h index 6a9fc20e..81ce9135 100644 --- a/include/mitsuba/core/bitmap.h +++ b/include/mitsuba/core/bitmap.h @@ -784,6 +784,24 @@ public: accumulate(bitmap, Point2i(0), targetOffset, bitmap->getSize()); } + /** + * \brief Convolve the image with a (centered) convolution kernel + * + * When compiled with FFTW, Mitsuba will do the convolution + * in frequency space using three FFT operations. Otherwise, + * it falls back to a brute force method with quadratic + * complexity. + * + * The image can have any resolution; the kernel should be + * square and of odd resolution. Both images must be \ref EFloat32 + * or \ref EFloat64 valued and of the same pixel format. Each + * channel is processed separately. + * + * The convolution is always performed in double precision. + * irrespective of the precision of the underlying data. + */ + void convolve(const Bitmap *kernel); + /** * \brief Accumulate the contents of another bitmap into the * region of the specified offset diff --git a/include/mitsuba/core/util.h b/include/mitsuba/core/util.h index 147eb51e..c1222b6b 100644 --- a/include/mitsuba/core/util.h +++ b/include/mitsuba/core/util.h @@ -274,11 +274,26 @@ extern MTS_EXPORT_CORE Float hypot2(Float a, Float b); extern MTS_EXPORT_CORE Float log2(Float value); /// Always-positive modulo function (assumes b > 0) -inline int modulo(int a, int b) { - int r = a % b; +inline int32_t modulo(int32_t a, int32_t b) { + int32_t r = a % b; return (r < 0) ? r+b : r; } +/// Always-positive modulo function (assumes b > 0) +inline int64_t modulo(int64_t a, int64_t b) { + int64_t r = a % b; + return (r < 0) ? r+b : r; +} + +#if defined(MTS_AMBIGUOUS_SIZE_T) +inline ssize_t modulo(ssize_t a, ssize_t b) { + if (sizeof(ssize_t) == 8) + return modulo((int64_t) a, (int64_t) b); + else + return modulo((int32_t) a, (int32_t) b); +} +#endif + /// Always-positive modulo function, float version (assumes b > 0) inline Float modulo(Float a, Float b) { Float r = std::fmod(a, b); diff --git a/src/libcore/SConscript b/src/libcore/SConscript index 57e69154..7559d368 100644 --- a/src/libcore/SConscript +++ b/src/libcore/SConscript @@ -22,6 +22,12 @@ if coreEnv.has_key('JPEGINCLUDE'): coreEnv.Prepend(CPPPATH=env['JPEGINCLUDE']) if coreEnv.has_key('JPEGLIB'): coreEnv.Prepend(LIBS=env['JPEGLIB']) +if coreEnv.has_key('FFTWLIBDIR'): + coreEnv.Prepend(LIBPATH=env['FFTWLIBDIR']) +if coreEnv.has_key('FFTWINCLUDE'): + coreEnv.Prepend(CPPPATH=env['FFTWINCLUDE']) +if coreEnv.has_key('FFTWLIB'): + coreEnv.Prepend(LIBS=env['FFTWLIB']) coreEnv.Prepend(CPPDEFINES = [['MTS_BUILD_MODULE', 'MTS_MODULE_CORE']]) diff --git a/src/libcore/bitmap.cpp b/src/libcore/bitmap.cpp index 6e5606d1..9849d15d 100644 --- a/src/libcore/bitmap.cpp +++ b/src/libcore/bitmap.cpp @@ -59,6 +59,11 @@ extern "C" { }; #endif +#if defined(MTS_HAS_FFTW) +#include +#include +#endif + MTS_NAMESPACE_BEGIN #if defined(MTS_HAS_OPENEXR) @@ -558,6 +563,156 @@ void Bitmap::accumulate(const Bitmap *bitmap, Point2i sourceOffset, } } +void Bitmap::convolve(const Bitmap *_kernel) { + if (_kernel->getWidth() != _kernel->getHeight()) + Log(EError, "Bitmap::convolve(): convolution kernel must be square!"); + if (_kernel->getWidth() % 2 != 1) + Log(EError, "Bitmap::convolve(): convolution kernel size must be odd!"); + if (_kernel->getChannelCount() != getChannelCount()) + Log(EError, "Bitmap::convolve(): kernel and bitmap have different channel counts!"); + if (_kernel->getPixelFormat() != getPixelFormat()) + Log(EError, "Bitmap::convolve(): kernel and bitmap have different pixel formats!"); + if (_kernel->getComponentFormat() != getComponentFormat()) + Log(EError, "Bitmap::convolve(): kernel and bitmap have different component formats!"); + if (m_componentFormat != EFloat32 && m_componentFormat != EFloat64) + Log(EError, "Bitmap::convolve(): unsupported component format! (must be float32/float64)"); + + size_t kernelSize = (size_t) _kernel->getWidth(), + hKernelSize = kernelSize / 2, + width = (size_t) m_size.x, + height = (size_t) m_size.y; + +#if defined(MTS_HAS_FFTW) + typedef std::complex complex; + + size_t paddedWidth = width + hKernelSize, + paddedHeight = height + hKernelSize, + paddedSize = paddedWidth*paddedHeight; + + complex *kernel = (complex *) fftw_malloc(sizeof(complex) * paddedSize), + *kernelS = (complex *) fftw_malloc(sizeof(complex) * paddedSize), + *data = (complex *) fftw_malloc(sizeof(complex) * paddedSize), + *dataS = (complex *) fftw_malloc(sizeof(complex) * paddedSize); + + if (!kernel || !kernelS || !data || !dataS) + SLog(EError, "Bitmap::convolve(): Unable to allocate temporary memory!"); + + /* Create a FFTW plan for a 2D DFT of this size */ + fftw_plan p = fftw_plan_dft_2d(paddedHeight, paddedWidth, + (fftw_complex *) kernel, (fftw_complex *) kernelS, FFTW_FORWARD, FFTW_ESTIMATE); + + memset(kernel, 0, sizeof(complex)*paddedSize); + + for (int ch=0; chgetFloat32Data()[(x+y*kernelSize)*m_channelCount+ch]; + } + } + + /* Copy and zero-pad the input data */ + for (size_t y=0; ygetFloat64Data()[(x+y*kernelSize)*m_channelCount+ch]; + } + } + + /* Copy and zero-pad the input data */ + for (size_t y=0; y(allocAligned(getBufferSize())); + for (int ch=0; chgetFloat32Data(); + + for (size_t y=0; y= 0 && ys >= 0 && xs < (ssize_t) width && ys < (ssize_t) height) + result += kernel[(kx+ky*kernelSize)*m_channelCount+ch] + * input[(xs+ys*width)*m_channelCount+ch]; + } + } + output[(x+y*width)*m_channelCount+ch] = (float) result; + } + } + } else { + const double *input = getFloat64Data(); + const double *kernel = _kernel->getFloat64Data(); + + for (size_t y=0; y= 0 && ys >= 0 && xs < (ssize_t) width && ys < (ssize_t) height) + result += kernel[(kx+ky*kernelSize)*m_channelCount+ch] + * input[(xs+ys*width)*m_channelCount+ch]; + } + } + output[(x+y*width)*m_channelCount+ch] = result; + } + } + } + } + if (m_ownsData) + freeAligned(m_data); + m_data = (uint8_t *) output; + m_ownsData = true; +#endif +} + void Bitmap::scale(Float value) { if (m_componentFormat == EBitmask) Log(EError, "Bitmap::scale(): bitmasks are not supported!"); @@ -801,7 +956,6 @@ ref Bitmap::arithmeticOperation(Bitmap::EArithmeticOperation operation, return output; } - void Bitmap::colorBalance(Float r, Float g, Float b) { if (m_pixelFormat != ERGB && m_pixelFormat != ERGBA) Log(EError, "colorBalance(): expected a RGB or RGBA image!"); diff --git a/src/libpython/core.cpp b/src/libpython/core.cpp index 1d596b72..e75f5be8 100644 --- a/src/libpython/core.cpp +++ b/src/libpython/core.cpp @@ -756,8 +756,10 @@ void export_core() { .def("crop", &Bitmap::crop) .def("applyMatrix", &bitmap_applyMatrix) .def("colorBalance", &Bitmap::colorBalance) + .def("convolve", &Bitmap::convolve) .def("accumulate", accumulate_1) .def("accumulate", accumulate_2) + .def("scale", &Bitmap::scale) .def("write", &bitmap_write) .def("setMetadataString", &Bitmap::setMetadataString) .def("getMetadataString", &Bitmap::getMetadataString, BP_RETURN_VALUE) diff --git a/src/mtsgui/aboutdlg.cpp b/src/mtsgui/aboutdlg.cpp index aa541777..8ac72edc 100644 --- a/src/mtsgui/aboutdlg.cpp +++ b/src/mtsgui/aboutdlg.cpp @@ -78,6 +78,10 @@ AboutDialog::AboutDialog(QWidget *parent) : configFlags += "MTS_HAS_BREAKPAD "; #endif +#if defined(MTS_HAS_FFTW) + configFlags += "MTS_HAS_FFTW "; +#endif + configFlags += formatString("SPECTRUM_SAMPLES=%i ", SPECTRUM_SAMPLES).c_str();