Fast FFT-based image convolution support for large kernels

metadata
Wenzel Jakob 2013-10-25 22:21:53 +02:00
parent dc4ea75259
commit 257526f6b2
10 changed files with 216 additions and 3 deletions

View File

@ -71,6 +71,9 @@ vars.Add('JPEGLIBDIR', 'libjpeg library path')
vars.Add('COLLADAINCLUDE', 'COLLADA DOM include path') vars.Add('COLLADAINCLUDE', 'COLLADA DOM include path')
vars.Add('COLLADALIB', 'COLLADA DOM libraries') vars.Add('COLLADALIB', 'COLLADA DOM libraries')
vars.Add('COLLADALIBDIR', 'COLLADA DOM library path') vars.Add('COLLADALIBDIR', 'COLLADA DOM library path')
vars.Add('FFTWINCLUDE', 'FFTW include path')
vars.Add('FFTWLIB', 'FFTW libraries')
vars.Add('FFTWLIBDIR', 'FFTW library path')
vars.Add('SHLIBPREFIX', 'Prefix for shared libraries') vars.Add('SHLIBPREFIX', 'Prefix for shared libraries')
vars.Add('SHLIBSUFFIX', 'Suffix for shared libraries') vars.Add('SHLIBSUFFIX', 'Suffix for shared libraries')
vars.Add('LIBPREFIX', 'Prefix for windows library files') vars.Add('LIBPREFIX', 'Prefix for windows library files')
@ -163,6 +166,10 @@ if env.has_key('COLLADAINCLUDE'):
env.Prepend(CPPPATH=env['COLLADAINCLUDE']) env.Prepend(CPPPATH=env['COLLADAINCLUDE'])
if env.has_key('COLLADALIBDIR'): if env.has_key('COLLADALIBDIR'):
env.Prepend(LIBPATH=env['COLLADALIBDIR']) env.Prepend(LIBPATH=env['COLLADALIBDIR'])
if env.has_key('FFTWINCLUDE'):
env.Prepend(CPPPATH=env['FFTWINCLUDE'])
if env.has_key('FFTWLIBDIR'):
env.Prepend(LIBPATH=env['FFTWLIBDIR'])
if not conf.CheckCXX(): if not conf.CheckCXX():
print 'Could not compile a simple C++ fragment, verify that ' + \ print 'Could not compile a simple C++ fragment, verify that ' + \
@ -216,6 +223,10 @@ if not conf.TryCompile("#include <boost/version.hpp>\n#if BOOST_VERSION < 104400
if not conf.CheckCXXHeader('Eigen/Core'): if not conf.CheckCXXHeader('Eigen/Core'):
print 'Eigen 3.x is missing (install libeigen3-dev)!' print 'Eigen 3.x is missing (install libeigen3-dev)!'
Exit(1) Exit(1)
if not conf.CheckCXXHeader('fftw3.h'):
print 'FFTW3 not found (install for fast image convolution support)'
else:
env.Append(CPPDEFINES = [['MTS_HAS_FFTW', 1]] )
if sys.platform == 'win32': if sys.platform == 'win32':
if not (conf.CheckCHeader(['windows.h', 'GL/gl.h']) \ if not (conf.CheckCHeader(['windows.h', 'GL/gl.h']) \
and conf.CheckCHeader(['windows.h', 'GL/glu.h']) \ and conf.CheckCHeader(['windows.h', 'GL/glu.h']) \

View File

@ -21,6 +21,7 @@ GLFLAGS = ['-DGLEW_MX']
BOOSTLIB = ['boost_system', 'boost_filesystem', 'boost_thread'] BOOSTLIB = ['boost_system', 'boost_filesystem', 'boost_thread']
COLLADAINCLUDE = ['/usr/include/collada-dom', '/usr/include/collada-dom/1.4'] COLLADAINCLUDE = ['/usr/include/collada-dom', '/usr/include/collada-dom/1.4']
COLLADALIB = ['collada14dom', 'xml2'] COLLADALIB = ['collada14dom', 'xml2']
FFTWLIB = ['fftw3']
# The following assumes that the Mitsuba bindings should be built for the # The following assumes that the Mitsuba bindings should be built for the
# "default" Python version. It is also possible to build bindings for multiple # "default" Python version. It is also possible to build bindings for multiple

View File

@ -21,6 +21,7 @@ GLFLAGS = ['-DGLEW_MX']
BOOSTLIB = ['boost_system', 'boost_filesystem', 'boost_thread'] BOOSTLIB = ['boost_system', 'boost_filesystem', 'boost_thread']
COLLADAINCLUDE = ['/usr/include/collada-dom', '/usr/include/collada-dom/1.4'] COLLADAINCLUDE = ['/usr/include/collada-dom', '/usr/include/collada-dom/1.4']
COLLADALIB = ['collada14dom', 'xml2'] COLLADALIB = ['collada14dom', 'xml2']
FFTWLIB = ['fftw3']
# The following assumes that the Mitsuba bindings should be built for the # The following assumes that the Mitsuba bindings should be built for the
# "default" Python version. It is also possible to build bindings for multiple # "default" Python version. It is also possible to build bindings for multiple

View File

@ -21,6 +21,7 @@ GLFLAGS = ['-DGLEW_MX']
BOOSTLIB = ['boost_system', 'boost_filesystem', 'boost_thread'] BOOSTLIB = ['boost_system', 'boost_filesystem', 'boost_thread']
COLLADAINCLUDE = ['/usr/include/collada-dom', '/usr/include/collada-dom/1.4'] COLLADAINCLUDE = ['/usr/include/collada-dom', '/usr/include/collada-dom/1.4']
COLLADALIB = ['collada14dom', 'xml2'] COLLADALIB = ['collada14dom', 'xml2']
FFTWLIB = ['fftw3']
# The following assumes that the Mitsuba bindings should be built for the # The following assumes that the Mitsuba bindings should be built for the
# "default" Python version. It is also possible to build bindings for multiple # "default" Python version. It is also possible to build bindings for multiple

View File

@ -784,6 +784,24 @@ public:
accumulate(bitmap, Point2i(0), targetOffset, bitmap->getSize()); accumulate(bitmap, Point2i(0), targetOffset, bitmap->getSize());
} }
/**
* \brief Convolve the image with a (centered) convolution kernel
*
* When compiled with FFTW, Mitsuba will do the convolution
* in frequency space using three FFT operations. Otherwise,
* it falls back to a brute force method with quadratic
* complexity.
*
* The image can have any resolution; the kernel should be
* square and of odd resolution. Both images must be \ref EFloat32
* or \ref EFloat64 valued and of the same pixel format. Each
* channel is processed separately.
*
* The convolution is always performed in double precision.
* irrespective of the precision of the underlying data.
*/
void convolve(const Bitmap *kernel);
/** /**
* \brief Accumulate the contents of another bitmap into the * \brief Accumulate the contents of another bitmap into the
* region of the specified offset * region of the specified offset

View File

@ -274,11 +274,26 @@ extern MTS_EXPORT_CORE Float hypot2(Float a, Float b);
extern MTS_EXPORT_CORE Float log2(Float value); extern MTS_EXPORT_CORE Float log2(Float value);
/// Always-positive modulo function (assumes b > 0) /// Always-positive modulo function (assumes b > 0)
inline int modulo(int a, int b) { inline int32_t modulo(int32_t a, int32_t b) {
int r = a % b; int32_t r = a % b;
return (r < 0) ? r+b : r; return (r < 0) ? r+b : r;
} }
/// Always-positive modulo function (assumes b > 0)
inline int64_t modulo(int64_t a, int64_t b) {
int64_t r = a % b;
return (r < 0) ? r+b : r;
}
#if defined(MTS_AMBIGUOUS_SIZE_T)
inline ssize_t modulo(ssize_t a, ssize_t b) {
if (sizeof(ssize_t) == 8)
return modulo((int64_t) a, (int64_t) b);
else
return modulo((int32_t) a, (int32_t) b);
}
#endif
/// Always-positive modulo function, float version (assumes b > 0) /// Always-positive modulo function, float version (assumes b > 0)
inline Float modulo(Float a, Float b) { inline Float modulo(Float a, Float b) {
Float r = std::fmod(a, b); Float r = std::fmod(a, b);

View File

@ -22,6 +22,12 @@ if coreEnv.has_key('JPEGINCLUDE'):
coreEnv.Prepend(CPPPATH=env['JPEGINCLUDE']) coreEnv.Prepend(CPPPATH=env['JPEGINCLUDE'])
if coreEnv.has_key('JPEGLIB'): if coreEnv.has_key('JPEGLIB'):
coreEnv.Prepend(LIBS=env['JPEGLIB']) coreEnv.Prepend(LIBS=env['JPEGLIB'])
if coreEnv.has_key('FFTWLIBDIR'):
coreEnv.Prepend(LIBPATH=env['FFTWLIBDIR'])
if coreEnv.has_key('FFTWINCLUDE'):
coreEnv.Prepend(CPPPATH=env['FFTWINCLUDE'])
if coreEnv.has_key('FFTWLIB'):
coreEnv.Prepend(LIBS=env['FFTWLIB'])
coreEnv.Prepend(CPPDEFINES = [['MTS_BUILD_MODULE', 'MTS_MODULE_CORE']]) coreEnv.Prepend(CPPDEFINES = [['MTS_BUILD_MODULE', 'MTS_MODULE_CORE']])

View File

@ -59,6 +59,11 @@ extern "C" {
}; };
#endif #endif
#if defined(MTS_HAS_FFTW)
#include <complex>
#include <fftw3.h>
#endif
MTS_NAMESPACE_BEGIN MTS_NAMESPACE_BEGIN
#if defined(MTS_HAS_OPENEXR) #if defined(MTS_HAS_OPENEXR)
@ -558,6 +563,156 @@ void Bitmap::accumulate(const Bitmap *bitmap, Point2i sourceOffset,
} }
} }
void Bitmap::convolve(const Bitmap *_kernel) {
if (_kernel->getWidth() != _kernel->getHeight())
Log(EError, "Bitmap::convolve(): convolution kernel must be square!");
if (_kernel->getWidth() % 2 != 1)
Log(EError, "Bitmap::convolve(): convolution kernel size must be odd!");
if (_kernel->getChannelCount() != getChannelCount())
Log(EError, "Bitmap::convolve(): kernel and bitmap have different channel counts!");
if (_kernel->getPixelFormat() != getPixelFormat())
Log(EError, "Bitmap::convolve(): kernel and bitmap have different pixel formats!");
if (_kernel->getComponentFormat() != getComponentFormat())
Log(EError, "Bitmap::convolve(): kernel and bitmap have different component formats!");
if (m_componentFormat != EFloat32 && m_componentFormat != EFloat64)
Log(EError, "Bitmap::convolve(): unsupported component format! (must be float32/float64)");
size_t kernelSize = (size_t) _kernel->getWidth(),
hKernelSize = kernelSize / 2,
width = (size_t) m_size.x,
height = (size_t) m_size.y;
#if defined(MTS_HAS_FFTW)
typedef std::complex<double> complex;
size_t paddedWidth = width + hKernelSize,
paddedHeight = height + hKernelSize,
paddedSize = paddedWidth*paddedHeight;
complex *kernel = (complex *) fftw_malloc(sizeof(complex) * paddedSize),
*kernelS = (complex *) fftw_malloc(sizeof(complex) * paddedSize),
*data = (complex *) fftw_malloc(sizeof(complex) * paddedSize),
*dataS = (complex *) fftw_malloc(sizeof(complex) * paddedSize);
if (!kernel || !kernelS || !data || !dataS)
SLog(EError, "Bitmap::convolve(): Unable to allocate temporary memory!");
/* Create a FFTW plan for a 2D DFT of this size */
fftw_plan p = fftw_plan_dft_2d(paddedHeight, paddedWidth,
(fftw_complex *) kernel, (fftw_complex *) kernelS, FFTW_FORWARD, FFTW_ESTIMATE);
memset(kernel, 0, sizeof(complex)*paddedSize);
for (int ch=0; ch<m_channelCount; ++ch) {
memset(data, 0, sizeof(complex)*paddedSize);
if (m_componentFormat == EFloat32) {
/* Copy and zero-pad the convolution kernel in a wraparound fashion */
for (size_t y=0; y<kernelSize; ++y) {
ssize_t wrappedY = modulo(hKernelSize - (ssize_t) y, (ssize_t) paddedHeight);
for (size_t x=0; x<kernelSize; ++x) {
ssize_t wrappedX = modulo(hKernelSize - (ssize_t) x, (ssize_t) paddedWidth);
kernel[wrappedX+wrappedY*paddedWidth] = _kernel->getFloat32Data()[(x+y*kernelSize)*m_channelCount+ch];
}
}
/* Copy and zero-pad the input data */
for (size_t y=0; y<height; ++y)
for (size_t x=0; x<width; ++x)
data[x+y*paddedWidth] = getFloat32Data()[(x+y*width)*m_channelCount + ch];
} else {
/* Copy and zero-pad the convolution kernel in a wraparound fashion */
for (size_t y=0; y<kernelSize; ++y) {
ssize_t wrappedY = modulo(hKernelSize - (ssize_t) y, (ssize_t) paddedHeight);
for (size_t x=0; x<kernelSize; ++x) {
ssize_t wrappedX = modulo(hKernelSize - (ssize_t) x, (ssize_t) paddedWidth);
kernel[wrappedX+wrappedY*paddedWidth] = _kernel->getFloat64Data()[(x+y*kernelSize)*m_channelCount+ch];
}
}
/* Copy and zero-pad the input data */
for (size_t y=0; y<height; ++y)
for (size_t x=0; x<width; ++x)
data[x+y*paddedWidth] = getFloat64Data()[(x+y*width)*m_channelCount + ch];
}
/* FFT the kernel and data */
fftw_execute(p);
fftw_execute_dft(p, (fftw_complex *) data, (fftw_complex *) dataS);
/* Multiply in frequency space -- also conjugate and scale to use the computed FFT plan backwards */
double factor = (double) 1 / (paddedWidth*paddedHeight);
for (size_t i=0; i<paddedSize; ++i)
dataS[i] = factor * std::conj(dataS[i] * kernelS[i]);
/* "IFFT" */
fftw_execute_dft(p, (fftw_complex *) dataS, (fftw_complex *) data);
if (m_componentFormat == EFloat32) {
for (size_t y=0; y<height; ++y)
for (size_t x=0; x<width; ++x)
getFloat32Data()[(x+y*width)*m_channelCount+ch] = (float) std::real(data[x+y*paddedWidth]);
} else {
for (size_t y=0; y<height; ++y)
for (size_t x=0; x<width; ++x)
getFloat64Data()[(x+y*width)*m_channelCount+ch] = std::real(data[x+y*paddedWidth]);
}
}
fftw_destroy_plan(p);
fftw_free(kernel);
fftw_free(kernelS);
fftw_free(data);
fftw_free(dataS);
#else
/* Brute force fallback version */
float *output = static_cast<float *>(allocAligned(getBufferSize()));
for (int ch=0; ch<m_channelCount; ++ch) {
if (m_componentFormat == EFloat32) {
const float *input = getFloat32Data();
const float *kernel = _kernel->getFloat32Data();
for (size_t y=0; y<height; ++y) {
for (size_t x=0; x<width; ++x) {
double result = 0;
for (size_t ky=0; ky<kernelSize; ++ky) {
for (size_t kx=0; kx<kernelSize; ++kx) {
ssize_t xs = x + kx - (ssize_t) hKernelSize,
ys = y + ky - (ssize_t) hKernelSize;
if (xs >= 0 && ys >= 0 && xs < (ssize_t) width && ys < (ssize_t) height)
result += kernel[(kx+ky*kernelSize)*m_channelCount+ch]
* input[(xs+ys*width)*m_channelCount+ch];
}
}
output[(x+y*width)*m_channelCount+ch] = (float) result;
}
}
} else {
const double *input = getFloat64Data();
const double *kernel = _kernel->getFloat64Data();
for (size_t y=0; y<height; ++y) {
for (size_t x=0; x<width; ++x) {
double result = 0;
for (size_t ky=0; ky<kernelSize; ++ky) {
for (size_t kx=0; kx<kernelSize; ++kx) {
ssize_t xs = x + kx - (ssize_t) hKernelSize,
ys = y + ky - (ssize_t) hKernelSize;
if (xs >= 0 && ys >= 0 && xs < (ssize_t) width && ys < (ssize_t) height)
result += kernel[(kx+ky*kernelSize)*m_channelCount+ch]
* input[(xs+ys*width)*m_channelCount+ch];
}
}
output[(x+y*width)*m_channelCount+ch] = result;
}
}
}
}
if (m_ownsData)
freeAligned(m_data);
m_data = (uint8_t *) output;
m_ownsData = true;
#endif
}
void Bitmap::scale(Float value) { void Bitmap::scale(Float value) {
if (m_componentFormat == EBitmask) if (m_componentFormat == EBitmask)
Log(EError, "Bitmap::scale(): bitmasks are not supported!"); Log(EError, "Bitmap::scale(): bitmasks are not supported!");
@ -801,7 +956,6 @@ ref<Bitmap> Bitmap::arithmeticOperation(Bitmap::EArithmeticOperation operation,
return output; return output;
} }
void Bitmap::colorBalance(Float r, Float g, Float b) { void Bitmap::colorBalance(Float r, Float g, Float b) {
if (m_pixelFormat != ERGB && m_pixelFormat != ERGBA) if (m_pixelFormat != ERGB && m_pixelFormat != ERGBA)
Log(EError, "colorBalance(): expected a RGB or RGBA image!"); Log(EError, "colorBalance(): expected a RGB or RGBA image!");

View File

@ -756,8 +756,10 @@ void export_core() {
.def("crop", &Bitmap::crop) .def("crop", &Bitmap::crop)
.def("applyMatrix", &bitmap_applyMatrix) .def("applyMatrix", &bitmap_applyMatrix)
.def("colorBalance", &Bitmap::colorBalance) .def("colorBalance", &Bitmap::colorBalance)
.def("convolve", &Bitmap::convolve)
.def("accumulate", accumulate_1) .def("accumulate", accumulate_1)
.def("accumulate", accumulate_2) .def("accumulate", accumulate_2)
.def("scale", &Bitmap::scale)
.def("write", &bitmap_write) .def("write", &bitmap_write)
.def("setMetadataString", &Bitmap::setMetadataString) .def("setMetadataString", &Bitmap::setMetadataString)
.def("getMetadataString", &Bitmap::getMetadataString, BP_RETURN_VALUE) .def("getMetadataString", &Bitmap::getMetadataString, BP_RETURN_VALUE)

View File

@ -78,6 +78,10 @@ AboutDialog::AboutDialog(QWidget *parent) :
configFlags += "MTS_HAS_BREAKPAD "; configFlags += "MTS_HAS_BREAKPAD ";
#endif #endif
#if defined(MTS_HAS_FFTW)
configFlags += "MTS_HAS_FFTW ";
#endif
configFlags += formatString("SPECTRUM_SAMPLES=%i ", configFlags += formatString("SPECTRUM_SAMPLES=%i ",
SPECTRUM_SAMPLES).c_str(); SPECTRUM_SAMPLES).c_str();