From 06b0562154c130ed56533adf5dc51115e6f2594e Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Mon, 22 Oct 2012 19:30:47 -0400
Subject: [PATCH 01/43] new branch to implement displacements based on work by
 Milos. Step 1: heightfields!

---
 src/shapes/SConscript      |   1 +
 src/shapes/heightfield.cpp | 371 +++++++++++++++++++++++++++++++++++++
 2 files changed, 372 insertions(+)
 create mode 100644 src/shapes/heightfield.cpp

diff --git a/src/shapes/SConscript b/src/shapes/SConscript
index 6abc53ce..4c48f497 100644
--- a/src/shapes/SConscript
+++ b/src/shapes/SConscript
@@ -12,5 +12,6 @@ plugins += env.SharedLibrary('hair', ['hair.cpp'])
 plugins += env.SharedLibrary('shapegroup', ['shapegroup.cpp'])
 plugins += env.SharedLibrary('instance', ['instance.cpp'])
 plugins += env.SharedLibrary('animatedinstance', ['animatedinstance.cpp'])
+plugins += env.SharedLibrary('heightfield', ['heightfield.cpp'])
 
 Export('plugins')
diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
new file mode 100644
index 00000000..5d91aa92
--- /dev/null
+++ b/src/shapes/heightfield.cpp
@@ -0,0 +1,371 @@
+/*
+    This file is part of Mitsuba, a physically based rendering system.
+
+    Copyright (c) 2007-2011 by Wenzel Jakob and others.
+
+    Mitsuba is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License Version 3
+    as published by the Free Software Foundation.
+
+    Mitsuba is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <mitsuba/render/shape.h>
+#include <mitsuba/render/bsdf.h>
+#include <mitsuba/render/trimesh.h>
+#include <mitsuba/core/properties.h>
+#include <mitsuba/core/bitmap.h>
+#include <mitsuba/core/fstream.h>
+
+MTS_NAMESPACE_BEGIN
+
+/*!\plugin{heightfield}{Height field}
+ * \order{12}
+ *
+ * Developed by Milo\^{s} Ha\^{s}an
+ */
+class Heightfield : public Shape {
+private:
+    inline Float getIJ(int i, int j) const {
+        i = std::max(0, std::min(m_rows-1, i));
+        j = std::max(0, std::min(m_cols-1, j));
+        return m_data[i*m_cols + j];
+    }
+
+    inline Normal getNormalIJ(int i, int j) const {
+        i = std::max(0, std::min(m_rows-1, i));
+        j = std::max(0, std::min(m_cols-1, j));
+        return m_normals[i*m_cols + j];
+    }
+
+    inline Float getXY(int x, int y) const {
+        return getIJ(m_rows - y - 1, x);
+    }
+
+    inline Normal getNormalXY(int x, int y) const {
+        return getNormalIJ(m_rows - y - 1, x);
+    }
+
+    inline Float getBilinear(Float x, Float y) const {
+        // cout << "getBilinear: " << x << ' ' << y << ' ';
+        int xi = (int) std::floor(x);
+        int yi = (int) std::floor(y);
+        Float u = x - xi, v = y - yi;
+        Float z00 = getXY(xi, yi);
+        Float z01 = getXY(xi, yi+1);
+        Float z10 = getXY(xi+1, yi);
+        Float z11 = getXY(xi+1, yi+1);
+        Float result = (1-u)*(1-v)*z00 + (1-u)*v*z01 +  u*(1-v)*z10 +  u*v*z11;
+        // cout << result << '\n';
+        return result;
+    }
+
+    inline Vector2 getBilinearGradient(Float x, Float y) const {
+        int xi = (int) std::floor(x);
+        int yi = (int) std::floor(y);
+        Float u = x - xi, v = y - yi;
+        Float z00 = getXY(xi, yi);
+        Float z01 = getXY(xi, yi+1);
+        Float z10 = getXY(xi+1, yi);
+        Float z11 = getXY(xi+1, yi+1);
+        Float du = (1-v) * (z10 - z00) + v * (z11 - z01);
+        Float dv = (1-u) * (z01 - z00) + u * (z11 - z10);
+        return Vector2(du, dv);
+    }
+
+    inline Normal getInterpolatedNormal(Float x, Float y) const {
+        int xi = (int) std::floor(x);
+        int yi = (int) std::floor(y);
+        Float u = x - xi, v = y - yi;
+        Normal z00 = getNormalXY(xi, yi);
+        Normal z01 = getNormalXY(xi, yi+1);
+        Normal z10 = getNormalXY(xi+1, yi);
+        Normal z11 = getNormalXY(xi+1, yi+1);
+        Normal result = (1-u)*(1-v)*z00 + (1-u)*v*z01 +  u*(1-v)*z10 +  u*v*z11;
+        return normalize(result);
+    }
+
+public:
+	Heightfield(const Properties &props) : Shape(props) {
+        m_pixelSize = props.getFloat("pixelSize", 0.01f);
+        Float multiplier = props.getFloat("multiplier", 1);
+        m_flipNormal = props.getBoolean("flipNormal", false);
+
+        // read bitmap; use only red channel
+        fs::path hfPath = props.getString("hf");
+		ref<FileStream> stream = new FileStream(hfPath);
+        ref<Bitmap> hf = new Bitmap(Bitmap::EOpenEXR, stream);
+        m_rows = hf->getHeight();
+        m_cols = hf->getWidth();
+        Vector4* hfData = (Vector4*) hf->getFloatData();
+
+        m_data = new float[m_rows * m_cols];
+        for (int i = 0; i < m_rows * m_cols; i++)
+            m_data[i] = hfData[i].x * multiplier;
+
+        // compute bounding box
+        Float xMax = (m_cols-1) * m_pixelSize / 2;
+        Float yMax = (m_rows-1) * m_pixelSize / 2;
+        Float zMin = std::numeric_limits<Float>::infinity(), zMax = -zMin;
+        for (int i = 0; i < m_rows * m_cols; i++) {
+            Float v = m_data[i];
+            zMin = std::min(zMin, v);
+            zMax = std::max(zMax, v);
+        }
+        zMin -= Epsilon; zMax += Epsilon;
+        m_aabb = AABB(Point(-xMax, -yMax, zMin), Point(xMax, yMax, zMax));
+        // cout << m_aabb.toString() << '\n';
+
+        // compute normals
+        m_normals = 0;
+        if (props.getBoolean("interpolateNormals", false)) {
+            m_normals = new Normal[m_rows * m_cols];
+            int index = 0;
+
+            for (int i = 0; i < m_rows; i++)
+                for (int j = 0; j < m_cols; j++) {
+                    Float dx = getIJ(i, j+1) - getIJ(i, j-1);
+                    Float dy = getIJ(i-1, j) - getIJ(i+1, j); // reversal between i and y
+                    Normal n(-dx / m_pixelSize, -dy / m_pixelSize, 2);
+                    m_normals[index++] = normalize(n);
+                }
+        }
+	}
+
+	Heightfield(Stream *stream, InstanceManager *manager)
+			: Shape(stream, manager) {
+		Assert(false);
+	}
+
+	~Heightfield() {
+	    delete[] m_data;
+	    if (m_normals) delete[] m_normals;
+    }
+
+	void serialize(Stream *stream, InstanceManager *manager) const {
+		Shape::serialize(stream, manager);
+		Assert(false);
+	}
+
+	AABB getAABB() const {
+		return m_aabb;
+	}
+
+	Float getSurfaceArea() const {
+		return 0;
+	}
+
+	/// find smallest t >= 0 such that a*t + b is integer
+	inline static Float nextInt(Float a, Float b) {
+		if (a == 0) return std::numeric_limits<Float>::infinity();
+		else if (a > 0) return (std::ceil(b) - b) / a;
+		else return (std::floor(b) - b) / a;
+	}
+
+    /// clip ray against bounding box
+	bool clipRay(const Ray& ray, Float& mint, Float& maxt) const {
+	    Float tmp1, tmp2;
+	    if (!m_aabb.rayIntersect(ray, tmp1, tmp2)) return false;
+        mint = std::max(mint, tmp1);
+        maxt = std::min(maxt, tmp2);
+        if (maxt <= mint) return false;
+        return true;
+	}
+
+	/// transform from world to grid space
+	Point world2grid(Point p) const {
+        p.x /= m_pixelSize;
+        p.y /= m_pixelSize;
+        p.x += (m_cols - 1) / 2.0f;
+        p.y += (m_rows - 1) / 2.0f;
+        return p;
+	}
+
+    /// Subtract ray, fit quadratic function and solve.
+    /// x0, xMid and x1 assumed to be 0, 0.5 and 1.
+    /// Return -1 if no solution in [0,1].
+    inline Float fitAndSolve(Float f0, Float fMid, Float f1, Float z0, Float z1) const {
+        // subtract the "z" line of the ray
+        f0 -= z0; f1 -= z1; fMid -= (z0 + z1) / 2;
+
+        // fit quadratic function to values
+        Float a = 2*f0 - 4*fMid + 2*f1;
+        Float b = -3*f0 + 4*fMid - f1;
+        Float c = f0;
+
+        // solve and return the smaller root within [0,1] (if any)
+        Float t0, t1;
+        if (!solveQuadratic(a, b, c, t0, t1)) return -1;
+        if (t0 >= 0 && t0 <= 1) return t0;
+        if (t1 >= 0 && t1 <= 1) return t1;
+        return -1;
+    }
+
+	/// find nearest intersection of ray with bilinear interpolant
+	Float intersectPatch(Float x0, Float y0, Float z0,
+	    Float x1, Float y1, Float z1) const {
+        Float f0 = getBilinear(x0, y0);
+        Float fMid = getBilinear((x0 + x1) / 2, (y0 + y1) / 2);
+        Float f1 = getBilinear(x1, y1);
+        return fitAndSolve(f0, fMid, f1, z0, z1);
+	}
+
+	bool rayIntersect(const Ray &ray, Float mint, Float maxt, Float &tResult, void *tmp) const {
+        if (!clipRay(ray, mint, maxt)) return false;
+
+	    // ray segment endpoints in grid space
+        Point start = world2grid(ray(mint));
+        Point end = world2grid(ray(maxt));
+
+        // the 2D vector from start to end of ray
+        Vector d = end - start;
+        if (d.lengthSquared() == 0) return false;
+
+        // handle trivial case of perpendicular ray
+        if (d.x == 0 && d.y == 0) {
+            Assert(d.z != 0);
+            Float z = getBilinear(start.x, start.y);
+
+            if ((z - start.z) * (z - end.z) > 0) return false;
+
+            Float t = (z - start.z) / (end.z - start.z);
+            tResult = (1-t) * mint + t * maxt;
+            return true;
+        }
+
+        // the 2D ray parameter and its deltas (can be infinite)
+        Float t = 0;
+        Float tMax = std::sqrt(d.x*d.x + d.y*d.y);
+        Float tDeltaX = std::abs(tMax / d.x);
+        Float tDeltaY = std::abs(tMax / d.y);
+        d /= tMax;
+        Float tMaxX = nextInt(d.x, start.x);
+        Float tMaxY = nextInt(d.y, start.y);
+        Float tNext;
+
+        while (t < tMax) {
+            if (tMaxX < tMaxY) {
+                // advance in x
+                tNext = std::min(tMaxX, tMax);
+                tMaxX += tDeltaX;
+            } else {
+                // advance in y
+                tNext = std::min(tMaxY, tMax);
+                tMaxY += tDeltaY;
+            }
+
+            // get intersection point (shifted into [0,1])
+            Point p = start + t * d;
+            Point q = start + tNext * d;
+            Float tPatch = intersectPatch(p.x, p.y, p.z, q.x, q.y, q.z);
+
+            if (tPatch >= 0) {
+                // found intersection, shift back from [0,1]
+                tPatch = t + tPatch * (tNext - t);
+                tResult = mint + tPatch * (maxt - mint) / tMax;
+                return true;
+            }
+
+            t = tNext;
+        }
+
+		return false;
+	}
+
+	bool rayIntersect(const Ray &r, Float mint, Float maxt) const {
+	    Float t;
+		return rayIntersect(r, mint, maxt, t, 0);
+	}
+
+	void fillIntersectionRecord(const Ray &ray,
+			const void *temp, Intersection &its) const {
+        Float t = its.t;
+        memset(&its, 0, sizeof(Intersection));
+
+        // position
+        its.t = t;
+		its.p = ray(t);
+
+		// normals
+		Point pGrid = world2grid(its.p);
+        Vector2 grad = getBilinearGradient(pGrid.x, pGrid.y);
+		Normal gn = normalize(Vector(-grad.x, -grad.y, m_pixelSize));
+		Normal sn = m_normals ? getInterpolatedNormal(pGrid.x, pGrid.y) : gn;
+
+		// primitive uv-s
+        its.uv = Point2(pGrid.x, pGrid.y);
+        its.dpdu = Vector(m_pixelSize, 0, grad.x);
+        its.dpdv = Vector(0, m_pixelSize, grad.y);
+
+		if (m_flipNormal) { gn *= -1; sn *= -1; }
+    	its.geoFrame = Frame(gn);
+    	its.shFrame = Frame(sn);
+
+ 		its.wi = its.toLocal(-ray.d);
+		its.shape = this;
+	}
+
+	void getNormalDerivative(const Intersection &its,
+        Vector &dndu, Vector &dndv, bool shadingFrame) const {
+    	if (!m_normals) {
+    		dndu = Vector(0.0f);
+    		dndv = Vector(0.0f);
+            return;
+        }
+
+        Float x = its.uv.x, y = its.uv.y;
+        int xi = (int) std::floor(x);
+        int yi = (int) std::floor(y);
+        Float u = x - xi, v = y - yi;
+        Normal n00 = getNormalXY(xi, yi);
+        Normal n01 = getNormalXY(xi, yi+1);
+        Normal n10 = getNormalXY(xi+1, yi);
+        Normal n11 = getNormalXY(xi+1, yi+1);
+
+    	Normal N((1-u)*(1-v)*n00 + (1-u)*v*n01 +  u*(1-v)*n10 +  u*v*n11);
+    	Float il = 1.0f / N.length(); N *= il;
+
+        dndu = (1-v) * (n10 - n00) + v * (n11 - n01);
+    	dndu *= il; dndu -= N * dot(N, dndu);
+        dndv = (1-u) * (n01 - n00) + u * (n11 - n10);
+    	dndv *= il; dndv -= N * dot(N, dndv);
+    }
+
+	size_t getPrimitiveCount() const {
+		return 1;
+	}
+
+	size_t getEffectivePrimitiveCount() const {
+		return 1;
+	}
+
+	std::string toString() const {
+		std::ostringstream oss;
+		oss << "Heightfield[" << endl
+			<< "  pixelSize = " << m_pixelSize << ", " << endl
+			<< "  rows = " << m_rows << ", " << endl
+			<< "  cols = " << m_cols << ", " << endl
+			<< "]";
+		return oss.str();
+	}
+
+	MTS_DECLARE_CLASS()
+private:
+    AABB m_aabb;
+    Float m_pixelSize;
+    int m_rows, m_cols;
+    float* m_data;
+    Normal* m_normals;
+    bool m_flipNormal;
+};
+
+MTS_IMPLEMENT_CLASS_S(Heightfield, false, Shape)
+MTS_EXPORT_PLUGIN(Heightfield, "Height field intersection primitive");
+MTS_NAMESPACE_END
+

From 9daefeb800f489c24679e019c0cc86d4c6d227b5 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Fri, 4 Jan 2013 09:04:24 -0500
Subject: [PATCH 02/43] started to rewrite the heightfield code from scratch

---
 src/shapes/heightfield.cpp | 371 -------------------------------------
 1 file changed, 371 deletions(-)
 delete mode 100644 src/shapes/heightfield.cpp

diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
deleted file mode 100644
index 5d91aa92..00000000
--- a/src/shapes/heightfield.cpp
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
-    This file is part of Mitsuba, a physically based rendering system.
-
-    Copyright (c) 2007-2011 by Wenzel Jakob and others.
-
-    Mitsuba is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License Version 3
-    as published by the Free Software Foundation.
-
-    Mitsuba is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <mitsuba/render/shape.h>
-#include <mitsuba/render/bsdf.h>
-#include <mitsuba/render/trimesh.h>
-#include <mitsuba/core/properties.h>
-#include <mitsuba/core/bitmap.h>
-#include <mitsuba/core/fstream.h>
-
-MTS_NAMESPACE_BEGIN
-
-/*!\plugin{heightfield}{Height field}
- * \order{12}
- *
- * Developed by Milo\^{s} Ha\^{s}an
- */
-class Heightfield : public Shape {
-private:
-    inline Float getIJ(int i, int j) const {
-        i = std::max(0, std::min(m_rows-1, i));
-        j = std::max(0, std::min(m_cols-1, j));
-        return m_data[i*m_cols + j];
-    }
-
-    inline Normal getNormalIJ(int i, int j) const {
-        i = std::max(0, std::min(m_rows-1, i));
-        j = std::max(0, std::min(m_cols-1, j));
-        return m_normals[i*m_cols + j];
-    }
-
-    inline Float getXY(int x, int y) const {
-        return getIJ(m_rows - y - 1, x);
-    }
-
-    inline Normal getNormalXY(int x, int y) const {
-        return getNormalIJ(m_rows - y - 1, x);
-    }
-
-    inline Float getBilinear(Float x, Float y) const {
-        // cout << "getBilinear: " << x << ' ' << y << ' ';
-        int xi = (int) std::floor(x);
-        int yi = (int) std::floor(y);
-        Float u = x - xi, v = y - yi;
-        Float z00 = getXY(xi, yi);
-        Float z01 = getXY(xi, yi+1);
-        Float z10 = getXY(xi+1, yi);
-        Float z11 = getXY(xi+1, yi+1);
-        Float result = (1-u)*(1-v)*z00 + (1-u)*v*z01 +  u*(1-v)*z10 +  u*v*z11;
-        // cout << result << '\n';
-        return result;
-    }
-
-    inline Vector2 getBilinearGradient(Float x, Float y) const {
-        int xi = (int) std::floor(x);
-        int yi = (int) std::floor(y);
-        Float u = x - xi, v = y - yi;
-        Float z00 = getXY(xi, yi);
-        Float z01 = getXY(xi, yi+1);
-        Float z10 = getXY(xi+1, yi);
-        Float z11 = getXY(xi+1, yi+1);
-        Float du = (1-v) * (z10 - z00) + v * (z11 - z01);
-        Float dv = (1-u) * (z01 - z00) + u * (z11 - z10);
-        return Vector2(du, dv);
-    }
-
-    inline Normal getInterpolatedNormal(Float x, Float y) const {
-        int xi = (int) std::floor(x);
-        int yi = (int) std::floor(y);
-        Float u = x - xi, v = y - yi;
-        Normal z00 = getNormalXY(xi, yi);
-        Normal z01 = getNormalXY(xi, yi+1);
-        Normal z10 = getNormalXY(xi+1, yi);
-        Normal z11 = getNormalXY(xi+1, yi+1);
-        Normal result = (1-u)*(1-v)*z00 + (1-u)*v*z01 +  u*(1-v)*z10 +  u*v*z11;
-        return normalize(result);
-    }
-
-public:
-	Heightfield(const Properties &props) : Shape(props) {
-        m_pixelSize = props.getFloat("pixelSize", 0.01f);
-        Float multiplier = props.getFloat("multiplier", 1);
-        m_flipNormal = props.getBoolean("flipNormal", false);
-
-        // read bitmap; use only red channel
-        fs::path hfPath = props.getString("hf");
-		ref<FileStream> stream = new FileStream(hfPath);
-        ref<Bitmap> hf = new Bitmap(Bitmap::EOpenEXR, stream);
-        m_rows = hf->getHeight();
-        m_cols = hf->getWidth();
-        Vector4* hfData = (Vector4*) hf->getFloatData();
-
-        m_data = new float[m_rows * m_cols];
-        for (int i = 0; i < m_rows * m_cols; i++)
-            m_data[i] = hfData[i].x * multiplier;
-
-        // compute bounding box
-        Float xMax = (m_cols-1) * m_pixelSize / 2;
-        Float yMax = (m_rows-1) * m_pixelSize / 2;
-        Float zMin = std::numeric_limits<Float>::infinity(), zMax = -zMin;
-        for (int i = 0; i < m_rows * m_cols; i++) {
-            Float v = m_data[i];
-            zMin = std::min(zMin, v);
-            zMax = std::max(zMax, v);
-        }
-        zMin -= Epsilon; zMax += Epsilon;
-        m_aabb = AABB(Point(-xMax, -yMax, zMin), Point(xMax, yMax, zMax));
-        // cout << m_aabb.toString() << '\n';
-
-        // compute normals
-        m_normals = 0;
-        if (props.getBoolean("interpolateNormals", false)) {
-            m_normals = new Normal[m_rows * m_cols];
-            int index = 0;
-
-            for (int i = 0; i < m_rows; i++)
-                for (int j = 0; j < m_cols; j++) {
-                    Float dx = getIJ(i, j+1) - getIJ(i, j-1);
-                    Float dy = getIJ(i-1, j) - getIJ(i+1, j); // reversal between i and y
-                    Normal n(-dx / m_pixelSize, -dy / m_pixelSize, 2);
-                    m_normals[index++] = normalize(n);
-                }
-        }
-	}
-
-	Heightfield(Stream *stream, InstanceManager *manager)
-			: Shape(stream, manager) {
-		Assert(false);
-	}
-
-	~Heightfield() {
-	    delete[] m_data;
-	    if (m_normals) delete[] m_normals;
-    }
-
-	void serialize(Stream *stream, InstanceManager *manager) const {
-		Shape::serialize(stream, manager);
-		Assert(false);
-	}
-
-	AABB getAABB() const {
-		return m_aabb;
-	}
-
-	Float getSurfaceArea() const {
-		return 0;
-	}
-
-	/// find smallest t >= 0 such that a*t + b is integer
-	inline static Float nextInt(Float a, Float b) {
-		if (a == 0) return std::numeric_limits<Float>::infinity();
-		else if (a > 0) return (std::ceil(b) - b) / a;
-		else return (std::floor(b) - b) / a;
-	}
-
-    /// clip ray against bounding box
-	bool clipRay(const Ray& ray, Float& mint, Float& maxt) const {
-	    Float tmp1, tmp2;
-	    if (!m_aabb.rayIntersect(ray, tmp1, tmp2)) return false;
-        mint = std::max(mint, tmp1);
-        maxt = std::min(maxt, tmp2);
-        if (maxt <= mint) return false;
-        return true;
-	}
-
-	/// transform from world to grid space
-	Point world2grid(Point p) const {
-        p.x /= m_pixelSize;
-        p.y /= m_pixelSize;
-        p.x += (m_cols - 1) / 2.0f;
-        p.y += (m_rows - 1) / 2.0f;
-        return p;
-	}
-
-    /// Subtract ray, fit quadratic function and solve.
-    /// x0, xMid and x1 assumed to be 0, 0.5 and 1.
-    /// Return -1 if no solution in [0,1].
-    inline Float fitAndSolve(Float f0, Float fMid, Float f1, Float z0, Float z1) const {
-        // subtract the "z" line of the ray
-        f0 -= z0; f1 -= z1; fMid -= (z0 + z1) / 2;
-
-        // fit quadratic function to values
-        Float a = 2*f0 - 4*fMid + 2*f1;
-        Float b = -3*f0 + 4*fMid - f1;
-        Float c = f0;
-
-        // solve and return the smaller root within [0,1] (if any)
-        Float t0, t1;
-        if (!solveQuadratic(a, b, c, t0, t1)) return -1;
-        if (t0 >= 0 && t0 <= 1) return t0;
-        if (t1 >= 0 && t1 <= 1) return t1;
-        return -1;
-    }
-
-	/// find nearest intersection of ray with bilinear interpolant
-	Float intersectPatch(Float x0, Float y0, Float z0,
-	    Float x1, Float y1, Float z1) const {
-        Float f0 = getBilinear(x0, y0);
-        Float fMid = getBilinear((x0 + x1) / 2, (y0 + y1) / 2);
-        Float f1 = getBilinear(x1, y1);
-        return fitAndSolve(f0, fMid, f1, z0, z1);
-	}
-
-	bool rayIntersect(const Ray &ray, Float mint, Float maxt, Float &tResult, void *tmp) const {
-        if (!clipRay(ray, mint, maxt)) return false;
-
-	    // ray segment endpoints in grid space
-        Point start = world2grid(ray(mint));
-        Point end = world2grid(ray(maxt));
-
-        // the 2D vector from start to end of ray
-        Vector d = end - start;
-        if (d.lengthSquared() == 0) return false;
-
-        // handle trivial case of perpendicular ray
-        if (d.x == 0 && d.y == 0) {
-            Assert(d.z != 0);
-            Float z = getBilinear(start.x, start.y);
-
-            if ((z - start.z) * (z - end.z) > 0) return false;
-
-            Float t = (z - start.z) / (end.z - start.z);
-            tResult = (1-t) * mint + t * maxt;
-            return true;
-        }
-
-        // the 2D ray parameter and its deltas (can be infinite)
-        Float t = 0;
-        Float tMax = std::sqrt(d.x*d.x + d.y*d.y);
-        Float tDeltaX = std::abs(tMax / d.x);
-        Float tDeltaY = std::abs(tMax / d.y);
-        d /= tMax;
-        Float tMaxX = nextInt(d.x, start.x);
-        Float tMaxY = nextInt(d.y, start.y);
-        Float tNext;
-
-        while (t < tMax) {
-            if (tMaxX < tMaxY) {
-                // advance in x
-                tNext = std::min(tMaxX, tMax);
-                tMaxX += tDeltaX;
-            } else {
-                // advance in y
-                tNext = std::min(tMaxY, tMax);
-                tMaxY += tDeltaY;
-            }
-
-            // get intersection point (shifted into [0,1])
-            Point p = start + t * d;
-            Point q = start + tNext * d;
-            Float tPatch = intersectPatch(p.x, p.y, p.z, q.x, q.y, q.z);
-
-            if (tPatch >= 0) {
-                // found intersection, shift back from [0,1]
-                tPatch = t + tPatch * (tNext - t);
-                tResult = mint + tPatch * (maxt - mint) / tMax;
-                return true;
-            }
-
-            t = tNext;
-        }
-
-		return false;
-	}
-
-	bool rayIntersect(const Ray &r, Float mint, Float maxt) const {
-	    Float t;
-		return rayIntersect(r, mint, maxt, t, 0);
-	}
-
-	void fillIntersectionRecord(const Ray &ray,
-			const void *temp, Intersection &its) const {
-        Float t = its.t;
-        memset(&its, 0, sizeof(Intersection));
-
-        // position
-        its.t = t;
-		its.p = ray(t);
-
-		// normals
-		Point pGrid = world2grid(its.p);
-        Vector2 grad = getBilinearGradient(pGrid.x, pGrid.y);
-		Normal gn = normalize(Vector(-grad.x, -grad.y, m_pixelSize));
-		Normal sn = m_normals ? getInterpolatedNormal(pGrid.x, pGrid.y) : gn;
-
-		// primitive uv-s
-        its.uv = Point2(pGrid.x, pGrid.y);
-        its.dpdu = Vector(m_pixelSize, 0, grad.x);
-        its.dpdv = Vector(0, m_pixelSize, grad.y);
-
-		if (m_flipNormal) { gn *= -1; sn *= -1; }
-    	its.geoFrame = Frame(gn);
-    	its.shFrame = Frame(sn);
-
- 		its.wi = its.toLocal(-ray.d);
-		its.shape = this;
-	}
-
-	void getNormalDerivative(const Intersection &its,
-        Vector &dndu, Vector &dndv, bool shadingFrame) const {
-    	if (!m_normals) {
-    		dndu = Vector(0.0f);
-    		dndv = Vector(0.0f);
-            return;
-        }
-
-        Float x = its.uv.x, y = its.uv.y;
-        int xi = (int) std::floor(x);
-        int yi = (int) std::floor(y);
-        Float u = x - xi, v = y - yi;
-        Normal n00 = getNormalXY(xi, yi);
-        Normal n01 = getNormalXY(xi, yi+1);
-        Normal n10 = getNormalXY(xi+1, yi);
-        Normal n11 = getNormalXY(xi+1, yi+1);
-
-    	Normal N((1-u)*(1-v)*n00 + (1-u)*v*n01 +  u*(1-v)*n10 +  u*v*n11);
-    	Float il = 1.0f / N.length(); N *= il;
-
-        dndu = (1-v) * (n10 - n00) + v * (n11 - n01);
-    	dndu *= il; dndu -= N * dot(N, dndu);
-        dndv = (1-u) * (n01 - n00) + u * (n11 - n10);
-    	dndv *= il; dndv -= N * dot(N, dndv);
-    }
-
-	size_t getPrimitiveCount() const {
-		return 1;
-	}
-
-	size_t getEffectivePrimitiveCount() const {
-		return 1;
-	}
-
-	std::string toString() const {
-		std::ostringstream oss;
-		oss << "Heightfield[" << endl
-			<< "  pixelSize = " << m_pixelSize << ", " << endl
-			<< "  rows = " << m_rows << ", " << endl
-			<< "  cols = " << m_cols << ", " << endl
-			<< "]";
-		return oss.str();
-	}
-
-	MTS_DECLARE_CLASS()
-private:
-    AABB m_aabb;
-    Float m_pixelSize;
-    int m_rows, m_cols;
-    float* m_data;
-    Normal* m_normals;
-    bool m_flipNormal;
-};
-
-MTS_IMPLEMENT_CLASS_S(Heightfield, false, Shape)
-MTS_EXPORT_PLUGIN(Heightfield, "Height field intersection primitive");
-MTS_NAMESPACE_END
-

From 4bcd5dd408d6d9b2d61a2e2c327df3eedab1583e Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Sun, 25 Aug 2013 12:27:25 +0200
Subject: [PATCH 03/43] fix for a BRE issue reported by Jean-Dominique Gascuel

---
 src/integrators/photonmapper/bre.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/integrators/photonmapper/bre.cpp b/src/integrators/photonmapper/bre.cpp
index aa0f6e55..d92c76e1 100644
--- a/src/integrators/photonmapper/bre.cpp
+++ b/src/integrators/photonmapper/bre.cpp
@@ -169,7 +169,7 @@ Spectrum BeamRadianceEstimator::query(const Ray &r, const Medium *medium) const
 		Float diskDistance = dot(originToCenter, ray.d), radSqr = node.radius * node.radius;
 		Float distSqr = (ray(diskDistance) - node.photon.getPosition()).lengthSquared();
 
-		if (distSqr < radSqr) {
+		if (diskDistance > 0 && distSqr < radSqr) {
 			Float weight = K2(distSqr/radSqr)/radSqr;
 
 			Vector wi = -node.photon.getDirection();

From a961b49ca7026840b3714f76e0ecc5a5d1c40589 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Wed, 28 Aug 2013 17:06:07 +0200
Subject: [PATCH 04/43] make the adaptive integrator behave in a nicer way wrt.
 the stop button (Jean-Dominique Gascuel)

---
 src/integrators/misc/adaptive.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/integrators/misc/adaptive.cpp b/src/integrators/misc/adaptive.cpp
index 65bb0119..77b6f868 100644
--- a/src/integrators/misc/adaptive.cpp
+++ b/src/integrators/misc/adaptive.cpp
@@ -212,7 +212,10 @@ public:
 			Float mean = 0, meanSqr = 0.0f;
 			sampleCount = 0;
 
-			while (!stop) {
+			while (true) {
+				if (stop)
+					return;
+
 				rRec.newQuery(RadianceQueryRecord::ESensorRay, sensor->getMedium());
 				rRec.extra = RadianceQueryRecord::EAdaptiveQuery;
 

From c8d22d3a9df34a5783bed868539de5752b216ef4 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Wed, 28 Aug 2013 17:07:09 +0200
Subject: [PATCH 05/43] edge.cpp: Don't allow creating edges with length=0

---
 src/libbidir/edge.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/libbidir/edge.cpp b/src/libbidir/edge.cpp
index 6ad50fb1..4e867899 100644
--- a/src/libbidir/edge.cpp
+++ b/src/libbidir/edge.cpp
@@ -48,6 +48,9 @@ bool PathEdge::sampleNext(const Scene *scene, Sampler *sampler,
 		return false;
 	}
 
+	if (length == 0)
+		return false;
+
 	if (!medium) {
 		weight[ERadiance] = weight[EImportance] = Spectrum(1.0f);
 		pdf[ERadiance] = pdf[EImportance] = 1.0f;
@@ -103,6 +106,9 @@ bool PathEdge::perturbDirection(const Scene *scene,
 	}
 	d = ray.d;
 
+	if (length == 0)
+		return false;
+
 	if (!medium) {
 		weight[ERadiance] = weight[EImportance] = Spectrum(1.0f);
 		pdf[ERadiance] = pdf[EImportance] = 1.0f;

From c7257d765d7688b06023657e5ce2ec735764b907 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Wed, 28 Aug 2013 17:11:34 +0200
Subject: [PATCH 06/43] minor homogeneous numerics fix (reported by
 Jean-Dominique Gascuel)

---
 src/medium/homogeneous.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/medium/homogeneous.cpp b/src/medium/homogeneous.cpp
index 2fdde4ae..1376b350 100644
--- a/src/medium/homogeneous.cpp
+++ b/src/medium/homogeneous.cpp
@@ -276,7 +276,8 @@ public:
 			Sampler *sampler) const {
 		Float rand = sampler->next1D(), sampledDistance;
 		Float samplingDensity = m_samplingDensity;
-		if (rand <= m_mediumSamplingWeight) {
+
+		if (rand < m_mediumSamplingWeight) {
 			rand /= m_mediumSamplingWeight;
 			if (m_strategy != EMaximum) {
 				/* Choose the sampling density to be used */

From 326f1533ac175995b7be67d5384571aea590ec92 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Wed, 4 Sep 2013 17:31:23 +0200
Subject: [PATCH 07/43] Bitmap::scale convenience function

---
 include/mitsuba/core/bitmap.h |   5 ++
 src/libcore/bitmap.cpp        | 141 ++++++++++++++++++++++++++++++++++
 2 files changed, 146 insertions(+)

diff --git a/include/mitsuba/core/bitmap.h b/include/mitsuba/core/bitmap.h
index a59328dd..2bc65e16 100644
--- a/include/mitsuba/core/bitmap.h
+++ b/include/mitsuba/core/bitmap.h
@@ -725,6 +725,11 @@ public:
 	void accumulate(const Bitmap *bitmap, Point2i sourceOffset,
 			Point2i targetOffset, Vector2i size);
 
+	/**
+	 * \brief Scale the entire image by a certain value
+	 */
+	void scale(Float value);
+
 	/**
 	 * \brief Color balancing: apply the given scale factors to the
 	 * red, green, and blue channels of the image
diff --git a/src/libcore/bitmap.cpp b/src/libcore/bitmap.cpp
index 99b93ae3..dc02b7c0 100644
--- a/src/libcore/bitmap.cpp
+++ b/src/libcore/bitmap.cpp
@@ -555,6 +555,147 @@ void Bitmap::accumulate(const Bitmap *bitmap, Point2i sourceOffset,
 	}
 }
 
+void Bitmap::scale(Float value) {
+	if (m_componentFormat == EBitmask)
+		Log(EError, "Bitmap::scale(): bitmasks are not supported!");
+
+	size_t nPixels = getPixelCount(), nChannels = getChannelCount();
+
+	if (hasAlpha()) {
+		switch (m_componentFormat) {
+			case EUInt8: {
+					uint8_t *data = (uint8_t *) m_data;
+					for (size_t i=0; i<nPixels; ++i) {
+						for (size_t j=0; j<nChannels-1; ++j) {
+							*data = (uint8_t) std::min((Float) std::numeric_limits<uint8_t>::max(),
+								std::max((Float) 0, *data * value + (Float) 0.5f));
+							++data;
+						}
+						++data;
+					}
+				}
+				break;
+
+			case EUInt16: {
+					uint16_t *data = (uint16_t *) m_data;
+					for (size_t i=0; i<nPixels; ++i) {
+						for (size_t j=0; j<nChannels-1; ++j) {
+							*data = (uint16_t) std::min((Float) std::numeric_limits<uint16_t>::max(),
+								std::max((Float) 0, *data * value + (Float) 0.5f));
+							++data;
+						}
+						++data;
+					}
+				}
+				break;
+
+			case EUInt32: {
+					uint32_t *data = (uint32_t *) m_data;
+					for (size_t i=0; i<nPixels; ++i) {
+						for (size_t j=0; j<nChannels-1; ++j) {
+							*data = (uint32_t) std::min((Float) std::numeric_limits<uint32_t>::max(),
+								std::max((Float) 0, *data * value + (Float) 0.5f));
+							++data;
+						}
+						++data;
+					}
+				}
+				break;
+
+			case EFloat16: {
+					half *data = (half *) m_data;
+					for (size_t i=0; i<nPixels; ++i) {
+						for (size_t j=0; j<nChannels-1; ++j) {
+							*data = (half) (*data * value); ++data;
+						}
+						++data;
+					}
+				}
+				break;
+
+			case EFloat32: {
+					float *data = (float *) m_data;
+					for (size_t i=0; i<nPixels; ++i) {
+						for (size_t j=0; j<nChannels-1; ++j) {
+							*data = (float) (*data * value); ++data;
+						}
+						++data;
+					}
+				}
+				break;
+
+			case EFloat64: {
+					double *data = (double *) m_data;
+					for (size_t i=0; i<nPixels; ++i) {
+						for (size_t j=0; j<nChannels-1; ++j) {
+							*data = (double) (*data * value); ++data;
+						}
+						++data;
+					}
+				}
+				break;
+
+			default:
+				Log(EError, "Bitmap::scale(): unexpected data format!");
+		}
+
+	} else {
+		size_t nEntries = nPixels * nChannels;
+
+		switch (m_componentFormat) {
+			case EUInt8: {
+					uint8_t *data = (uint8_t *) m_data;
+					for (size_t i=0; i<nEntries; ++i)
+						data[i] = (uint8_t) std::min((Float) std::numeric_limits<uint8_t>::max(),
+							std::max((Float) 0, data[i] * value + (Float) 0.5f));
+				}
+				break;
+
+			case EUInt16: {
+					uint16_t *data = (uint16_t *) m_data;
+					for (size_t i=0; i<nEntries; ++i)
+						data[i] = (uint16_t) std::min((Float) std::numeric_limits<uint16_t>::max(),
+							std::max((Float) 0, data[i] * value + (Float) 0.5f));
+				}
+				break;
+
+			case EUInt32: {
+					uint32_t *data = (uint32_t *) m_data;
+					for (size_t i=0; i<nEntries; ++i)
+						data[i] = (uint32_t) std::min((Float) std::numeric_limits<uint32_t>::max(),
+							std::max((Float) 0, data[i] * value + (Float) 0.5f));
+				}
+				break;
+
+			case EFloat16: {
+					half *data = (half *) m_data;
+					for (size_t i=0; i<nEntries; ++i)
+						data[i] = (half) (data[i] * value);
+				}
+				break;
+
+			case EFloat32: {
+					float *data = (float *) m_data;
+					for (size_t i=0; i<nEntries; ++i)
+						data[i] = (float) (data[i] * value);
+				}
+				break;
+
+			case EFloat64: {
+					double *data = (double *) m_data;
+					for (size_t i=0; i<nEntries; ++i)
+						data[i] = (double) (data[i] * value);
+				}
+				break;
+
+			default:
+				Log(EError, "Bitmap::scale(): unexpected data format!");
+		}
+	}
+}
+
+
+
 void Bitmap::colorBalance(Float r, Float g, Float b) {
 	if (m_pixelFormat != ERGB && m_pixelFormat != ERGBA)
 		Log(EError, "colorBalance(): expected a RGB or RGBA image!");

From d582d8578d98d622ec1d045abbd6c7b5e24bc350 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Thu, 5 Sep 2013 15:04:39 +0200
Subject: [PATCH 08/43] Convienience functions for easily doing arithmetic
 operations with images of arbitrary types

Also added a feature to allow creation of bitmaps over external/temporary memory, which the Bitmap
instance won't deallocate upon its destruction
---
 include/mitsuba/core/bitmap.h |  85 ++++++++++++++----
 src/libcore/bitmap.cpp        | 159 ++++++++++++++++++++++++++++++----
 2 files changed, 209 insertions(+), 35 deletions(-)

diff --git a/include/mitsuba/core/bitmap.h b/include/mitsuba/core/bitmap.h
index 2bc65e16..f37b32f0 100644
--- a/include/mitsuba/core/bitmap.h
+++ b/include/mitsuba/core/bitmap.h
@@ -270,6 +270,14 @@ public:
 		ERotate90FlipY      = ERotate270FlipX
 	};
 
+	/// The four basic arithmetic operations for use with \ref arithmeticOperation()
+	enum EArithmeticOperation {
+		EAddition = 0,
+		ESubtraction,
+		EMultiplication,
+		EDivision
+	};
+
 	/**
 	 * \brief Create a bitmap of the specified type and allocate
 	 * the necessary amount of memory
@@ -287,9 +295,13 @@ public:
 	 * \param channelCount
 	 *    Channel count of the image. This parameter is only required when
 	 *    \c pFmt = \ref EMultiChannel
+	 *
+	 * \param data
+	 *    External pointer to the image data. If set to \c NULL, the
+	 *    implementation will allocate memory itself.
 	 */
 	Bitmap(EPixelFormat pFmt, EComponentFormat cFmt, const Vector2i &size,
-		int channelCount = -1);
+		uint8_t channelCount = 0, uint8_t *data = NULL);
 
 	/**
 	 * \brief Load a bitmap from an arbitrary stream data source
@@ -335,7 +347,7 @@ public:
 	inline int getHeight() const { return m_size.y; }
 
 	/// Return the number of channels used by this bitmap
-	inline int getChannelCount() const { return m_channelCount; }
+	inline int getChannelCount() const { return (int) m_channelCount; }
 
 	/// Return whether this image has matching width and height
 	inline bool isSquare() const { return m_size.x == m_size.y; }
@@ -711,22 +723,12 @@ public:
 	/// Perform the specified rotatation & flip operation
 	ref<Bitmap> rotateFlip(ERotateFlipType type) const;
 
-	/**
-	 * \brief Accumulate the contents of another bitmap into the
-	 * region of the specified offset
-	 *
-	 * Out-of-bounds regions are ignored. It is assumed that
-	 * <tt>bitmap != this</tt>.
-	 *
-	 * \remark This function throws an exception when the bitmaps
-	 * use different component formats or channels, or when the
-	 * component format is \ref EBitmask.
-	 */
-	void accumulate(const Bitmap *bitmap, Point2i sourceOffset,
-			Point2i targetOffset, Vector2i size);
-
 	/**
 	 * \brief Scale the entire image by a certain value
+	 *
+	 * Skips the image's alpha channel, if it has one. When the image uses
+	 * a fixed point representation and a pixel value overflows during the
+	 * scale operation, it is clamped to the representable range.
 	 */
 	void scale(Float value);
 
@@ -748,6 +750,19 @@ public:
 	 */
 	void applyMatrix(Float matrix[3][3]);
 
+	/**
+	 * \brief Accumulate the contents of another bitmap into the
+	 * region of the specified offset
+	 *
+	 * Out-of-bounds regions are ignored. It is assumed that
+	 * <tt>bitmap != this</tt>.
+	 *
+	 * \remark This function throws an exception when the bitmaps
+	 * use different component formats or channels, or when the
+	 * component format is \ref EBitmask.
+	 */
+	void accumulate(const Bitmap *bitmap, Point2i sourceOffset,
+			Point2i targetOffset, Vector2i size);
 	/**
 	 * \brief Accumulate the contents of another bitmap into the
 	 * region of the specified offset
@@ -765,6 +780,41 @@ public:
 		accumulate(bitmap, Point2i(0), targetOffset, bitmap->getSize());
 	}
 
+	/**
+	 * \brief Accumulate the contents of another bitmap into the
+	 * region of the specified offset
+	 *
+	 * This convenience function calls the main <tt>accumulate()</tt>
+	 * implementation with <tt>size</tt> set to <tt>bitmap->getSize()</tt>
+	 * and <tt>sourceOffset</tt> and <tt>targetOffset</tt>tt> set to zero.
+	 * Out-of-bounds regions are ignored. It is assumed
+	 * that <tt>bitmap != this</tt>.
+	 *
+	 * \remark This function throws an exception when the bitmaps
+	 * use different component formats or channels, or when the
+	 * component format is \ref EBitmask.
+	 */
+	inline void accumulate(const Bitmap *bitmap) {
+		accumulate(bitmap, Point2i(0), Point2i(0), bitmap->getSize());
+	}
+
+	/**
+	 * \brief Perform an arithmetic operation using two images
+	 *
+	 * This function can add, subtract, multiply, or divide arbitrary
+	 * images. If the input images have different sizes or component
+	 * and pixel formats, the implementation first resamples and
+	 * converts them into the most "expressive" format that subsumes
+	 * both input images (at the cost of some temporary dynamic
+	 * memory allocations).
+	 *
+	 * To keep the implementation simple, there is currently no
+	 * special treatment of integer over/underflows if the component
+	 * format is \ref EUInt8, \ref EUInt16, or \ref EUInt32.
+	 */
+	static ref<Bitmap> arithmeticOperation(EArithmeticOperation operation,
+			const Bitmap *bitmap1, const Bitmap *bitmap2);
+
 	/**
 	 * \brief Up- or down-sample this image to a different resolution
 	 *
@@ -946,7 +996,8 @@ protected:
 	Vector2i m_size;
 	uint8_t *m_data;
 	Float m_gamma;
-	int m_channelCount;
+	uint8_t m_channelCount;
+	bool m_ownsData;
 	Properties m_metadata;
 };
 
diff --git a/src/libcore/bitmap.cpp b/src/libcore/bitmap.cpp
index dc02b7c0..6e5606d1 100644
--- a/src/libcore/bitmap.cpp
+++ b/src/libcore/bitmap.cpp
@@ -251,8 +251,8 @@ extern "C" {
  * ========================== */
 
 Bitmap::Bitmap(EPixelFormat pFormat, EComponentFormat cFormat,
-		const Vector2i &size, int channelCount) : m_pixelFormat(pFormat),
-		m_componentFormat(cFormat), m_size(size), m_channelCount(channelCount) {
+		const Vector2i &size, uint8_t channelCount, uint8_t *data) : m_pixelFormat(pFormat),
+		m_componentFormat(cFormat), m_size(size), m_data(data), m_channelCount(channelCount), m_ownsData(false) {
 	AssertEx(size.x > 0 && size.y > 0, "Invalid bitmap size");
 
 	if (m_componentFormat == EUInt8)
@@ -262,10 +262,13 @@ Bitmap::Bitmap(EPixelFormat pFormat, EComponentFormat cFormat,
 
 	updateChannelCount();
 
-	m_data = static_cast<uint8_t *>(allocAligned(getBufferSize()));
+	if (!m_data) {
+		m_data = static_cast<uint8_t *>(allocAligned(getBufferSize()));
+		m_ownsData = true;
+	}
 }
 
-Bitmap::Bitmap(EFileFormat format, Stream *stream, const std::string &prefix) : m_data(NULL) {
+Bitmap::Bitmap(EFileFormat format, Stream *stream, const std::string &prefix) : m_data(NULL), m_ownsData(false) {
 	if (format == EAuto) {
 		/* Try to automatically detect the file format */
 		size_t pos = stream->getPos();
@@ -336,7 +339,7 @@ void Bitmap::write(EFileFormat format, Stream *stream, int compression,
 }
 
 size_t Bitmap::getBufferSize() const {
-	size_t bitsPerRow = m_size.x * m_channelCount * getBitsPerComponent();
+	size_t bitsPerRow = (size_t) m_size.x * m_channelCount * getBitsPerComponent();
 	size_t bytesPerRow = (bitsPerRow + 7) / 8; // round up to full bytes
 	return bytesPerRow * (size_t) m_size.y;
 }
@@ -392,7 +395,7 @@ int Bitmap::getBytesPerComponent() const {
 }
 
 Bitmap::~Bitmap() {
-	if (m_data)
+	if (m_data && m_ownsData)
 		freeAligned(m_data);
 }
 
@@ -502,7 +505,7 @@ void Bitmap::accumulate(const Bitmap *bitmap, Point2i sourceOffset,
 		return;
 
 	const size_t
-		columns      = size.x * m_channelCount,
+		columns      = (size_t) size.x * m_channelCount,
 		pixelStride  = getBytesPerPixel(),
 		sourceStride = bitmap->getWidth() * pixelStride,
 		targetStride = getWidth() * pixelStride;
@@ -567,7 +570,7 @@ void Bitmap::scale(Float value) {
 					uint8_t *data = (uint8_t *) m_data;
 					for (size_t i=0; i<nPixels; ++i) {
 						for (size_t j=0; j<nChannels-1; ++j) {
-							*data = (uint8_t) std::min((Float) std::numeric_limits<uint8_t>::max(),
+							*data = (uint8_t) std::min((Float) 0xFF,
 								std::max((Float) 0, *data * value + (Float) 0.5f));
 							++data;
 						}
@@ -580,7 +583,7 @@ void Bitmap::scale(Float value) {
 					uint16_t *data = (uint16_t *) m_data;
 					for (size_t i=0; i<nPixels; ++i) {
 						for (size_t j=0; j<nChannels-1; ++j) {
-							*data = (uint16_t) std::min((Float) std::numeric_limits<uint16_t>::max(),
+							*data = (uint16_t) std::min((Float) 0xFFFF,
 								std::max((Float) 0, *data * value + (Float) 0.5f));
 							++data;
 						}
@@ -593,7 +596,7 @@ void Bitmap::scale(Float value) {
 					uint32_t *data = (uint32_t *) m_data;
 					for (size_t i=0; i<nPixels; ++i) {
 						for (size_t j=0; j<nChannels-1; ++j) {
-							*data = (uint32_t) std::min((Float) std::numeric_limits<uint32_t>::max(),
+							*data = (uint32_t) std::min((Float) 0xFFFFFFFFUL,
 								std::max((Float) 0, *data * value + (Float) 0.5f));
 							++data;
 						}
@@ -646,7 +649,7 @@ void Bitmap::scale(Float value) {
 			case EUInt8: {
 					uint8_t *data = (uint8_t *) m_data;
 					for (size_t i=0; i<nEntries; ++i)
-						data[i] = (uint8_t) std::min((Float) std::numeric_limits<uint8_t>::max(),
+						data[i] = (uint8_t) std::min((Float) 0xFF,
 							std::max((Float) 0, data[i] * value + (Float) 0.5f));
 				}
 				break;
@@ -654,7 +657,7 @@ void Bitmap::scale(Float value) {
 			case EUInt16: {
 					uint16_t *data = (uint16_t *) m_data;
 					for (size_t i=0; i<nEntries; ++i)
-						data[i] = (uint16_t) std::min((Float) std::numeric_limits<uint16_t>::max(),
+						data[i] = (uint16_t) std::min((Float) 0xFFFF,
 							std::max((Float) 0, data[i] * value + (Float) 0.5f));
 				}
 				break;
@@ -662,7 +665,7 @@ void Bitmap::scale(Float value) {
 			case EUInt32: {
 					uint32_t *data = (uint32_t *) m_data;
 					for (size_t i=0; i<nEntries; ++i)
-						data[i] = (uint32_t) std::min((Float) std::numeric_limits<uint32_t>::max(),
+						data[i] = (uint32_t) std::min((Float) 0xFFFFFFFFUL,
 							std::max((Float) 0, data[i] * value + (Float) 0.5f));
 				}
 				break;
@@ -694,6 +697,109 @@ void Bitmap::scale(Float value) {
 	}
 }
 
+ref<Bitmap> Bitmap::arithmeticOperation(Bitmap::EArithmeticOperation operation, const Bitmap *_bitmap1, const Bitmap *_bitmap2) {
+	ref<const Bitmap> bitmap1(_bitmap1), bitmap2(_bitmap2);
+
+	/* Determine the 'fancier' pixel / component format by a maximum operation on the enum values */
+	EPixelFormat pxFmt = (EPixelFormat) std::max(bitmap1->getPixelFormat(), bitmap2->getPixelFormat());
+	EComponentFormat cFmt = (EComponentFormat) std::max(bitmap1->getComponentFormat(), bitmap2->getComponentFormat());
+
+	if (cFmt == EBitmask)
+		Log(EError, "Bitmap::arithmeticOperation(): bitmasks are not supported!");
+
+	/* Make sure that the images match in size (resample if necessary) */
+	Vector2i size(
+		std::max(bitmap1->getWidth(), bitmap2->getWidth()),
+		std::max(bitmap1->getHeight(), bitmap2->getHeight()));
+
+	if (bitmap1->getSize() != size) {
+		bitmap1 = bitmap1->resample(NULL,
+				ReconstructionFilter::EClamp,
+				ReconstructionFilter::EClamp, size,
+				-std::numeric_limits<Float>::infinity(),
+				std::numeric_limits<Float>::infinity());
+	}
+
+	if (bitmap2->getSize() != size) {
+		bitmap2 = bitmap2->resample(NULL,
+				ReconstructionFilter::EClamp,
+				ReconstructionFilter::EClamp, size,
+				-std::numeric_limits<Float>::infinity(),
+				std::numeric_limits<Float>::infinity());
+	}
+
+	/* Convert the image format appropriately (no-op, if the format already matches) */
+	bitmap1 = const_cast<Bitmap *>(bitmap1.get())->convert(pxFmt, cFmt);
+	bitmap2 = const_cast<Bitmap *>(bitmap2.get())->convert(pxFmt, cFmt);
+
+	ref<Bitmap> output = new Bitmap(pxFmt, cFmt, size);
+	size_t nValues = output->getPixelCount() * output->getChannelCount();
+
+	#define IMPLEMENT_OPS() \
+		switch (operation) { \
+			case EAddition:       for (size_t i=0; i<nValues; ++i) dst[i] = src1[i] + src2[i]; break; \
+			case ESubtraction:    for (size_t i=0; i<nValues; ++i) dst[i] = src1[i] - src2[i]; break; \
+			case EMultiplication: for (size_t i=0; i<nValues; ++i) dst[i] = src1[i] * src2[i]; break; \
+			case EDivision:       for (size_t i=0; i<nValues; ++i) dst[i] = src1[i] / src2[i]; break; \
+		}
+
+	switch (cFmt) {
+		case EUInt8: {
+				const uint8_t *src1 = bitmap1->getUInt8Data();
+				const uint8_t *src2 = bitmap2->getUInt8Data();
+				uint8_t *dst = output->getUInt8Data();
+				IMPLEMENT_OPS();
+			}
+			break;
+
+		case EUInt16: {
+				const uint16_t *src1 = bitmap1->getUInt16Data();
+				const uint16_t *src2 = bitmap2->getUInt16Data();
+				uint16_t *dst = output->getUInt16Data();
+				IMPLEMENT_OPS();
+			}
+			break;
+
+		case EUInt32: {
+				const uint32_t *src1 = bitmap1->getUInt32Data();
+				const uint32_t *src2 = bitmap2->getUInt32Data();
+				uint32_t *dst = output->getUInt32Data();
+				IMPLEMENT_OPS();
+			}
+			break;
+
+		case EFloat16: {
+				const half *src1 = bitmap1->getFloat16Data();
+				const half *src2 = bitmap2->getFloat16Data();
+				half *dst = output->getFloat16Data();
+				IMPLEMENT_OPS();
+			}
+			break;
+
+		case EFloat32: {
+				const float *src1 = bitmap1->getFloat32Data();
+				const float *src2 = bitmap2->getFloat32Data();
+				float *dst = output->getFloat32Data();
+				IMPLEMENT_OPS();
+			}
+			break;
+
+		case EFloat64: {
+				const double *src1 = bitmap1->getFloat64Data();
+				const double *src2 = bitmap2->getFloat64Data();
+				double *dst = output->getFloat64Data();
+				IMPLEMENT_OPS();
+			}
+			break;
+
+		default:
+			Log(EError, "Bitmap::arithmeticOperation(): unexpected data format!");
+	}
+
+	#undef IMPLEMENT_OPS
+
+	return output;
+}
 
 
 void Bitmap::colorBalance(Float r, Float g, Float b) {
@@ -1273,9 +1379,8 @@ void Bitmap::applyMatrix(Float matrix_[3][3]) {
 	}
 }
 
-
 /// Bitmap resampling utility function
-template <typename Scalar> static void resample(const ReconstructionFilter *rfilter,
+template <typename Scalar> static void resample(ref<const ReconstructionFilter> rfilter,
 	ReconstructionFilter::EBoundaryCondition bch,
 	ReconstructionFilter::EBoundaryCondition bcv,
 	const Bitmap *source, Bitmap *target, Float minValue, Float maxValue) {
@@ -1283,6 +1388,17 @@ template <typename Scalar> static void resample(const ReconstructionFilter *rfil
 
 	int channels = source->getChannelCount();
 
+	if (!rfilter) {
+		/* Resample using a 2-lobed Lanczos reconstruction filter */
+		Properties rfilterProps("lanczos");
+		rfilterProps.setInteger("lobes", 2);
+		ReconstructionFilter *instance = static_cast<ReconstructionFilter *> (
+			PluginManager::getInstance()->createObject(
+			MTS_CLASS(ReconstructionFilter), rfilterProps));
+		instance->configure();
+		rfilter = instance;
+	}
+
 	if (source->getWidth() != target->getWidth()) {
 		/* Re-sample along the X direction */
 		Resampler<Scalar> r(rfilter, bch, source->getWidth(), target->getWidth());
@@ -1490,6 +1606,7 @@ void Bitmap::readPNG(Stream *stream) {
 
 	size_t bufferSize = getBufferSize();
 	m_data = static_cast<uint8_t *>(allocAligned(bufferSize));
+	m_ownsData = true;
 	rows = new png_bytep[m_size.y];
 	size_t rowBytes = png_get_rowbytes(png_ptr, info_ptr);
 	Assert(rowBytes == getBufferSize() / m_size.y);
@@ -1646,6 +1763,7 @@ void Bitmap::readJPEG(Stream *stream) {
 		* (size_t) cinfo.output_components;
 
 	m_data = static_cast<uint8_t *>(allocAligned(getBufferSize()));
+	m_ownsData = true;
 
 	boost::scoped_array<uint8_t*> scanlines(new uint8_t*[m_size.y]);
 	for (int i=0; i<m_size.y; ++i)
@@ -1897,7 +2015,7 @@ void Bitmap::readOpenEXR(Stream *stream, const std::string &_prefix) {
 
 	updateChannelCount();
 	m_gamma = 1.0f;
-	Assert(m_channelCount == (int) sourceChannels.size());
+	Assert(m_channelCount == (uint8_t) sourceChannels.size());
 
 	Imf::PixelType pxType = channels[sourceChannels[0]].type;
 
@@ -1933,11 +2051,12 @@ void Bitmap::readOpenEXR(Stream *stream, const std::string &_prefix) {
 
 	/* Finally, allocate memory for it */
 	m_data = static_cast<uint8_t *>(allocAligned(getBufferSize()));
+	m_ownsData = true;
 	char *ptr = (char *) m_data;
 
 	ptr -= (dataWindow.min.x + dataWindow.min.y * m_size.x) * pixelStride;
 
-	ref_vector<Bitmap> resampleBuffers(m_channelCount);
+	ref_vector<Bitmap> resampleBuffers((size_t) m_channelCount);
 	ref<ReconstructionFilter> rfilter;
 
 	/* Tell OpenEXR where the image data should be put */
@@ -2310,6 +2429,7 @@ void Bitmap::readTGA(Stream *stream) {
 		   rowSize = bufferSize / height;
 
 	m_data = static_cast<uint8_t *>(allocAligned(bufferSize));
+	m_ownsData = true;
 	int channels = bpp/8;
 
 	if (!rle) {
@@ -2400,6 +2520,7 @@ void Bitmap::readBMP(Stream *stream) {
 
 	size_t bufferSize = getBufferSize();
 	m_data = static_cast<uint8_t *>(allocAligned(bufferSize));
+	m_ownsData = true;
 
 	Log(ETrace, "Loading a %ix%i BMP file", m_size.x, m_size.y);
 
@@ -2537,6 +2658,7 @@ void Bitmap::readRGBE(Stream *stream) {
 	m_channelCount = 3;
 	m_gamma = 1.0f;
 	m_data = static_cast<uint8_t *>(allocAligned(getBufferSize()));
+	m_ownsData = true;
 	float *data = (float *) m_data;
 
 	if (m_size.x < 8 || m_size.x > 0x7fff) {
@@ -2714,6 +2836,7 @@ void Bitmap::readPFM(Stream *stream) {
 		SLog(EError, "Could not parse scale/order information!");
 
 	m_data = static_cast<uint8_t *>(allocAligned(getBufferSize()));
+	m_ownsData = true;
 	float *data = (float *) m_data;
 
 	Stream::EByteOrder backup = stream->getByteOrder();
@@ -2767,7 +2890,7 @@ void Bitmap::writePFM(Stream *stream) const {
 			float *dest = temp;
 
 			for (int x=0; x<m_size.x; ++x) {
-				for (int j=0; j<m_channelCount-1; ++j)
+				for (uint8_t j=0; j<m_channelCount-1; ++j)
 					*dest++ = *source++;
 				source++;
 			}

From 11452f7c86713da66c184569d46c0d6c996d1757 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Thu, 5 Sep 2013 15:05:03 +0200
Subject: [PATCH 09/43] Functions to rasterize textures to bitmaps

---
 include/mitsuba/hw/basicshader.h | 10 ++++++++++
 include/mitsuba/render/texture.h | 25 +++++++++++++++++++++++--
 src/libhw/basicshader.cpp        | 31 +++++++++++++++++++++++++++++++
 src/librender/texture.cpp        | 18 +++++++++++++++++-
 src/textures/bitmap.cpp          |  2 +-
 5 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/include/mitsuba/hw/basicshader.h b/include/mitsuba/hw/basicshader.h
index d9ef441a..7743f722 100644
--- a/include/mitsuba/hw/basicshader.h
+++ b/include/mitsuba/hw/basicshader.h
@@ -75,6 +75,8 @@ public:
 
 	Shader *createShader(Renderer *renderer) const;
 
+	ref<Bitmap> getBitmap(const Vector2i &resolutionHint) const;
+
 	void serialize(Stream *stream, InstanceManager *manager) const;
 
 	MTS_DECLARE_CLASS()
@@ -128,6 +130,8 @@ public:
 
 	Shader *createShader(Renderer *renderer) const;
 
+	ref<Bitmap> getBitmap(const Vector2i &resolutionHint) const;
+
 	void serialize(Stream *stream, InstanceManager *manager) const;
 
 	MTS_DECLARE_CLASS()
@@ -185,6 +189,8 @@ public:
 
 	Shader *createShader(Renderer *renderer) const;
 
+	ref<Bitmap> getBitmap(const Vector2i &resolutionHint) const;
+
 	void serialize(Stream *stream, InstanceManager *manager) const;
 
 	MTS_DECLARE_CLASS()
@@ -242,6 +248,8 @@ public:
 
 	Shader *createShader(Renderer *renderer) const;
 
+	ref<Bitmap> getBitmap(const Vector2i &resolutionHint) const;
+
 	void serialize(Stream *stream, InstanceManager *manager) const;
 
 	MTS_DECLARE_CLASS()
@@ -302,6 +310,8 @@ public:
 
 	void serialize(Stream *stream, InstanceManager *manager) const;
 
+	ref<Bitmap> getBitmap(const Vector2i &resolutionHint) const;
+
 	MTS_DECLARE_CLASS()
 protected:
 	ref<const Texture> m_a, m_b;
diff --git a/include/mitsuba/render/texture.h b/include/mitsuba/render/texture.h
index 05108c39..14007228 100644
--- a/include/mitsuba/render/texture.h
+++ b/include/mitsuba/render/texture.h
@@ -74,8 +74,17 @@ public:
 	/// Serialize to a binary data stream
 	virtual void serialize(Stream *stream, InstanceManager *manager) const;
 
-	/// Return the underlying bitmap representation (if any)
-	virtual ref<Bitmap> getBitmap() const;
+	/**
+	 * \brief Return a bitmap representation of the texture
+	 *
+	 * When the class implementing this interface is a bitmap-backed texture,
+	 * this function directly returns the underlying bitmap. When it is procedural,
+	 * a bitmap version must first be generated. In this case, the parameter
+	 * \ref resolutionHint is used to control the target resolution. The default
+	 * value <tt>-1, -1</tt> allows the implementation to choose a suitable
+	 * resolution by itself.
+	 */
+	virtual ref<Bitmap> getBitmap(const Vector2i &resolutionHint = Vector2i(-1, -1)) const;
 
 	MTS_DECLARE_CLASS()
 protected:
@@ -109,6 +118,18 @@ public:
 	virtual Spectrum eval(const Point2 &uv, const Vector2 &d0,
 			const Vector2 &d1) const = 0;
 
+	/**
+	 * \brief Return a bitmap representation of the texture
+	 *
+	 * When the class implementing this interface is a bitmap-backed texture,
+	 * this function directly returns the underlying bitmap. When it is procedural,
+	 * a bitmap version must first be generated. In this case, the parameter
+	 * \ref resolutionHint is used to control the target resolution. The default
+	 * value <tt>-1, -1</tt> allows the implementation to choose a suitable
+	 * resolution by itself.
+	 */
+	virtual ref<Bitmap> getBitmap(const Vector2i &resolutionHint = Vector2i(-1, -1)) const;
+
 	MTS_DECLARE_CLASS()
 protected:
 	Texture2D(const Properties &props);
diff --git a/src/libhw/basicshader.cpp b/src/libhw/basicshader.cpp
index f88d70d1..8aa8b86d 100644
--- a/src/libhw/basicshader.cpp
+++ b/src/libhw/basicshader.cpp
@@ -20,6 +20,7 @@
 
 MTS_NAMESPACE_BEGIN
 
+
 ConstantSpectrumTexture::ConstantSpectrumTexture(Stream *stream, InstanceManager *manager)
  : Texture(stream, manager) {
 	m_value = Spectrum(stream);
@@ -31,6 +32,12 @@ void ConstantSpectrumTexture::serialize(Stream *stream, InstanceManager *manager
 	m_value.serialize(stream);
 }
 
+ref<Bitmap> ConstantSpectrumTexture::getBitmap(const Vector2i &resolutionHint) const {
+	ref<Bitmap> result = new Bitmap(Bitmap::ESpectrum, Bitmap::EFloat, Vector2i(1, 1));
+	*((Spectrum *) result->getFloatData()) = m_value;
+	return result;
+}
+
 ConstantFloatTexture::ConstantFloatTexture(Stream *stream, InstanceManager *manager)
  : Texture(stream, manager) {
 	m_value = stream->readFloat();
@@ -41,6 +48,12 @@ void ConstantFloatTexture::serialize(Stream *stream, InstanceManager *manager) c
 	stream->writeFloat(m_value);
 }
 
+ref<Bitmap> ConstantFloatTexture::getBitmap(const Vector2i &resolutionHint) const {
+	ref<Bitmap> result = new Bitmap(Bitmap::ELuminance, Bitmap::EFloat, Vector2i(1, 1));
+	*result->getFloatData() = m_value;
+	return result;
+}
+
 SpectrumProductTexture::SpectrumProductTexture(Stream *stream, InstanceManager *manager)
  : Texture(stream, manager) {
 	m_a = static_cast<Texture *>(manager->getInstance(stream));
@@ -53,6 +66,12 @@ void SpectrumProductTexture::serialize(Stream *stream, InstanceManager *manager)
 	manager->serialize(stream, m_b.get());
 }
 
+ref<Bitmap> SpectrumProductTexture::getBitmap(const Vector2i &resolutionHint) const {
+	ref<Bitmap> bitmap1 = m_a->getBitmap(resolutionHint);
+	ref<Bitmap> bitmap2 = m_b->getBitmap(resolutionHint);
+	return Bitmap::arithmeticOperation(Bitmap::EMultiplication, bitmap1.get(), bitmap2.get());
+}
+
 SpectrumAdditionTexture::SpectrumAdditionTexture(Stream *stream, InstanceManager *manager)
  : Texture(stream, manager) {
 	m_a = static_cast<Texture *>(manager->getInstance(stream));
@@ -65,6 +84,12 @@ void SpectrumAdditionTexture::serialize(Stream *stream, InstanceManager *manager
 	manager->serialize(stream, m_b.get());
 }
 
+ref<Bitmap> SpectrumAdditionTexture::getBitmap(const Vector2i &resolutionHint) const {
+	ref<Bitmap> bitmap1 = m_a->getBitmap(resolutionHint);
+	ref<Bitmap> bitmap2 = m_b->getBitmap(resolutionHint);
+	return Bitmap::arithmeticOperation(Bitmap::EAddition, bitmap1.get(), bitmap2.get());
+}
+
 SpectrumSubtractionTexture::SpectrumSubtractionTexture(Stream *stream, InstanceManager *manager)
  : Texture(stream, manager) {
 	m_a = static_cast<Texture *>(manager->getInstance(stream));
@@ -77,6 +102,12 @@ void SpectrumSubtractionTexture::serialize(Stream *stream, InstanceManager *mana
 	manager->serialize(stream, m_b.get());
 }
 
+ref<Bitmap> SpectrumSubtractionTexture::getBitmap(const Vector2i &resolutionHint) const {
+	ref<Bitmap> bitmap1 = m_a->getBitmap(resolutionHint);
+	ref<Bitmap> bitmap2 = m_b->getBitmap(resolutionHint);
+	return Bitmap::arithmeticOperation(Bitmap::ESubtraction, bitmap1.get(), bitmap2.get());
+}
+
 class ConstantSpectrumTextureShader : public Shader {
 public:
 	ConstantSpectrumTextureShader(Renderer *renderer, const Spectrum &value)
diff --git a/src/librender/texture.cpp b/src/librender/texture.cpp
index 76221c71..2d99fcbd 100644
--- a/src/librender/texture.cpp
+++ b/src/librender/texture.cpp
@@ -47,7 +47,7 @@ Spectrum Texture::getMinimum() const { NotImplementedError("getMinimum"); }
 Spectrum Texture::getMaximum() const { NotImplementedError("getMaximum"); }
 bool Texture::isConstant() const { NotImplementedError("isConstant"); }
 bool Texture::usesRayDifferentials() const { NotImplementedError("usesRayDifferentials"); }
-ref<Bitmap> Texture::getBitmap() const { return NULL; }
+ref<Bitmap> Texture::getBitmap(const Vector2i &) const { NotImplementedError("getBitmap"); }
 
 ref<Texture> Texture::expand() {
 	return this;
@@ -100,6 +100,22 @@ Spectrum Texture2D::eval(const Intersection &its, bool filter) const {
 		return eval(uv);
 	}
 }
+
+ref<Bitmap> Texture2D::getBitmap(const Vector2i &resolutionHint) const {
+	Vector2i res(resolutionHint);
+	if (res.x <= 0 || res.y <= 0)
+		res = Vector2i(32);
+
+	Float invX = 1.0f / res.x, invY = 1.0f / res.y;
+
+	ref<Bitmap> bitmap = new Bitmap(Bitmap::ESpectrum, Bitmap::EFloat, res);
+	Spectrum *target = (Spectrum *) bitmap->getFloatData();
+	for (int y=0; y<res.y; ++y)
+		for (int x=0; x<res.x; ++x)
+			*target++ = eval(Point2((x + 0.5f) * invX, (y + 0.5f) * invY));
+	return bitmap;
+}
+
 MTS_IMPLEMENT_CLASS(Texture, true, ConfigurableObject)
 MTS_IMPLEMENT_CLASS(Texture2D, true, Texture)
 MTS_NAMESPACE_END
diff --git a/src/textures/bitmap.cpp b/src/textures/bitmap.cpp
index 301d95f7..da5b45b1 100644
--- a/src/textures/bitmap.cpp
+++ b/src/textures/bitmap.cpp
@@ -428,7 +428,7 @@ public:
 		return result;
 	}
 
-	ref<Bitmap> getBitmap() const {
+	ref<Bitmap> getBitmap(const Vector2i &/* unused */) {
 		return m_mipmap1.get() ? m_mipmap1->toBitmap() : m_mipmap3->toBitmap();
 	}
 

From 70ad3fbd6205459a947a7fe9d048b30a591adc00 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Fri, 6 Sep 2013 10:56:36 +0200
Subject: [PATCH 10/43] add an extra space in memString()

---
 src/libcore/util.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/libcore/util.cpp b/src/libcore/util.cpp
index 199f5eb5..6a90128b 100644
--- a/src/libcore/util.cpp
+++ b/src/libcore/util.cpp
@@ -829,7 +829,7 @@ std::string memString(size_t size, bool precise) {
 
 	std::ostringstream os;
 	os << std::setprecision(suffix == 0 ? 0 : (precise ? 4 : 1))
-	   << std::fixed << value << suffixes[suffix];
+	   << std::fixed << value << " " << suffixes[suffix];
 
 	return os.str();
 }

From 517a30c369d8e1b696ea2b951669b5f9664520c4 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Fri, 6 Sep 2013 18:00:09 +0200
Subject: [PATCH 11/43] height field intersection method by Ramsey et al., will
 revert to something simpler

---
 data/schema/scene.xsd            |   1 +
 include/mitsuba/core/fwd.h       |   1 +
 include/mitsuba/core/point.h     |   3 +
 include/mitsuba/core/vector.h    |   3 +
 include/mitsuba/render/skdtree.h |   2 +-
 include/mitsuba/render/texture.h |  12 +-
 src/libhw/basicshader.cpp        |  22 +-
 src/librender/texture.cpp        |   4 +-
 src/shapes/heightfield.cpp       | 399 +++++++++++++++++++++++++++++++
 src/textures/bitmap.cpp          |   2 +-
 src/textures/scale.cpp           |  19 +-
 11 files changed, 446 insertions(+), 22 deletions(-)
 create mode 100644 src/shapes/heightfield.cpp

diff --git a/data/schema/scene.xsd b/data/schema/scene.xsd
index 18de916b..6ba8314d 100644
--- a/data/schema/scene.xsd
+++ b/data/schema/scene.xsd
@@ -137,6 +137,7 @@
 					<xsd:element name="emitter" type="emitter"/>
 					<xsd:element name="shape" type="shape"/>
 					<xsd:element name="medium" type="medium"/>
+					<xsd:element name="texture" type="texture"/>
 				</xsd:choice>
 			</xsd:extension>
 		</xsd:complexContent>
diff --git a/include/mitsuba/core/fwd.h b/include/mitsuba/core/fwd.h
index 91ebc7ec..8e7b8d16 100644
--- a/include/mitsuba/core/fwd.h
+++ b/include/mitsuba/core/fwd.h
@@ -149,6 +149,7 @@ typedef TVector2<size_t>      Size2;
 typedef TVector3<size_t>      Size3;
 typedef TVector4<size_t>      Size4;
 typedef TAABB<Point1>         AABB1;
+typedef AABB1                 Interval;
 typedef TAABB<Point2>         AABB2;
 typedef TAABB<Point4>         AABB4;
 /// \ingroup libpython
diff --git a/include/mitsuba/core/point.h b/include/mitsuba/core/point.h
index 459fe425..483eb810 100644
--- a/include/mitsuba/core/point.h
+++ b/include/mitsuba/core/point.h
@@ -173,6 +173,9 @@ template <typename T> struct TPoint1 {
 		stream->writeElement<T>(x);
 	}
 
+	/// Implicit conversion to Scalar
+	operator Scalar() const { return x; }
+
 	/// Return a readable string representation of this point
 	std::string toString() const {
 		std::ostringstream oss;
diff --git a/include/mitsuba/core/vector.h b/include/mitsuba/core/vector.h
index 2b59c012..4276f32b 100644
--- a/include/mitsuba/core/vector.h
+++ b/include/mitsuba/core/vector.h
@@ -165,6 +165,9 @@ template <typename T> struct TVector1 {
 		stream->writeElement<T>(x);
 	}
 
+	/// Implicit conversion to Scalar
+	operator Scalar() const { return x; }
+
 	/// Return a readable string representation of this vector
 	std::string toString() const {
 		std::ostringstream oss;
diff --git a/include/mitsuba/render/skdtree.h b/include/mitsuba/render/skdtree.h
index e6f25f9d..de3fd8f1 100644
--- a/include/mitsuba/render/skdtree.h
+++ b/include/mitsuba/render/skdtree.h
@@ -31,7 +31,7 @@
 #endif
 
 #if defined(SINGLE_PRECISION)
-/// 32 byte temporary storage for intersection computations
+/// 64 byte temporary storage for intersection computations
 #define MTS_KD_INTERSECTION_TEMP 64
 #else
 #define MTS_KD_INTERSECTION_TEMP 128
diff --git a/include/mitsuba/render/texture.h b/include/mitsuba/render/texture.h
index 14007228..6b27cd70 100644
--- a/include/mitsuba/render/texture.h
+++ b/include/mitsuba/render/texture.h
@@ -80,11 +80,11 @@ public:
 	 * When the class implementing this interface is a bitmap-backed texture,
 	 * this function directly returns the underlying bitmap. When it is procedural,
 	 * a bitmap version must first be generated. In this case, the parameter
-	 * \ref resolutionHint is used to control the target resolution. The default
+	 * \ref sizeHint is used to control the target size. The default
 	 * value <tt>-1, -1</tt> allows the implementation to choose a suitable
-	 * resolution by itself.
+	 * size by itself.
 	 */
-	virtual ref<Bitmap> getBitmap(const Vector2i &resolutionHint = Vector2i(-1, -1)) const;
+	virtual ref<Bitmap> getBitmap(const Vector2i &sizeHint = Vector2i(-1, -1)) const;
 
 	MTS_DECLARE_CLASS()
 protected:
@@ -124,11 +124,11 @@ public:
 	 * When the class implementing this interface is a bitmap-backed texture,
 	 * this function directly returns the underlying bitmap. When it is procedural,
 	 * a bitmap version must first be generated. In this case, the parameter
-	 * \ref resolutionHint is used to control the target resolution. The default
+	 * \ref sizeHint is used to control the target size. The default
 	 * value <tt>-1, -1</tt> allows the implementation to choose a suitable
-	 * resolution by itself.
+	 * size by itself.
 	 */
-	virtual ref<Bitmap> getBitmap(const Vector2i &resolutionHint = Vector2i(-1, -1)) const;
+	virtual ref<Bitmap> getBitmap(const Vector2i &sizeHint = Vector2i(-1, -1)) const;
 
 	MTS_DECLARE_CLASS()
 protected:
diff --git a/src/libhw/basicshader.cpp b/src/libhw/basicshader.cpp
index 8aa8b86d..5b9e53ba 100644
--- a/src/libhw/basicshader.cpp
+++ b/src/libhw/basicshader.cpp
@@ -32,7 +32,7 @@ void ConstantSpectrumTexture::serialize(Stream *stream, InstanceManager *manager
 	m_value.serialize(stream);
 }
 
-ref<Bitmap> ConstantSpectrumTexture::getBitmap(const Vector2i &resolutionHint) const {
+ref<Bitmap> ConstantSpectrumTexture::getBitmap(const Vector2i &sizeHint) const {
 	ref<Bitmap> result = new Bitmap(Bitmap::ESpectrum, Bitmap::EFloat, Vector2i(1, 1));
 	*((Spectrum *) result->getFloatData()) = m_value;
 	return result;
@@ -48,7 +48,7 @@ void ConstantFloatTexture::serialize(Stream *stream, InstanceManager *manager) c
 	stream->writeFloat(m_value);
 }
 
-ref<Bitmap> ConstantFloatTexture::getBitmap(const Vector2i &resolutionHint) const {
+ref<Bitmap> ConstantFloatTexture::getBitmap(const Vector2i &sizeHint) const {
 	ref<Bitmap> result = new Bitmap(Bitmap::ELuminance, Bitmap::EFloat, Vector2i(1, 1));
 	*result->getFloatData() = m_value;
 	return result;
@@ -66,9 +66,9 @@ void SpectrumProductTexture::serialize(Stream *stream, InstanceManager *manager)
 	manager->serialize(stream, m_b.get());
 }
 
-ref<Bitmap> SpectrumProductTexture::getBitmap(const Vector2i &resolutionHint) const {
-	ref<Bitmap> bitmap1 = m_a->getBitmap(resolutionHint);
-	ref<Bitmap> bitmap2 = m_b->getBitmap(resolutionHint);
+ref<Bitmap> SpectrumProductTexture::getBitmap(const Vector2i &sizeHint) const {
+	ref<Bitmap> bitmap1 = m_a->getBitmap(sizeHint);
+	ref<Bitmap> bitmap2 = m_b->getBitmap(sizeHint);
 	return Bitmap::arithmeticOperation(Bitmap::EMultiplication, bitmap1.get(), bitmap2.get());
 }
 
@@ -84,9 +84,9 @@ void SpectrumAdditionTexture::serialize(Stream *stream, InstanceManager *manager
 	manager->serialize(stream, m_b.get());
 }
 
-ref<Bitmap> SpectrumAdditionTexture::getBitmap(const Vector2i &resolutionHint) const {
-	ref<Bitmap> bitmap1 = m_a->getBitmap(resolutionHint);
-	ref<Bitmap> bitmap2 = m_b->getBitmap(resolutionHint);
+ref<Bitmap> SpectrumAdditionTexture::getBitmap(const Vector2i &sizeHint) const {
+	ref<Bitmap> bitmap1 = m_a->getBitmap(sizeHint);
+	ref<Bitmap> bitmap2 = m_b->getBitmap(sizeHint);
 	return Bitmap::arithmeticOperation(Bitmap::EAddition, bitmap1.get(), bitmap2.get());
 }
 
@@ -102,9 +102,9 @@ void SpectrumSubtractionTexture::serialize(Stream *stream, InstanceManager *mana
 	manager->serialize(stream, m_b.get());
 }
 
-ref<Bitmap> SpectrumSubtractionTexture::getBitmap(const Vector2i &resolutionHint) const {
-	ref<Bitmap> bitmap1 = m_a->getBitmap(resolutionHint);
-	ref<Bitmap> bitmap2 = m_b->getBitmap(resolutionHint);
+ref<Bitmap> SpectrumSubtractionTexture::getBitmap(const Vector2i &sizeHint) const {
+	ref<Bitmap> bitmap1 = m_a->getBitmap(sizeHint);
+	ref<Bitmap> bitmap2 = m_b->getBitmap(sizeHint);
 	return Bitmap::arithmeticOperation(Bitmap::ESubtraction, bitmap1.get(), bitmap2.get());
 }
 
diff --git a/src/librender/texture.cpp b/src/librender/texture.cpp
index 2d99fcbd..4c203831 100644
--- a/src/librender/texture.cpp
+++ b/src/librender/texture.cpp
@@ -101,8 +101,8 @@ Spectrum Texture2D::eval(const Intersection &its, bool filter) const {
 	}
 }
 
-ref<Bitmap> Texture2D::getBitmap(const Vector2i &resolutionHint) const {
-	Vector2i res(resolutionHint);
+ref<Bitmap> Texture2D::getBitmap(const Vector2i &sizeHint) const {
+	Vector2i res(sizeHint);
 	if (res.x <= 0 || res.y <= 0)
 		res = Vector2i(32);
 
diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
new file mode 100644
index 00000000..505aac83
--- /dev/null
+++ b/src/shapes/heightfield.cpp
@@ -0,0 +1,399 @@
+/*
+    This file is part of Mitsuba, a physically based rendering system.
+
+    Copyright (c) 2007-2011 by Wenzel Jakob and others.
+
+    Mitsuba is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License Version 3
+    as published by the Free Software Foundation.
+
+    Mitsuba is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <mitsuba/render/shape.h>
+#include <mitsuba/render/bsdf.h>
+#include <mitsuba/render/trimesh.h>
+#include <mitsuba/render/texture.h>
+#include <mitsuba/core/bitmap.h>
+#include <mitsuba/core/timer.h>
+
+MTS_NAMESPACE_BEGIN
+
+/*!\plugin{heightfield}{Height field}
+ * \order{12}
+ *
+ * Developed by Milo\^{s} Ha\^{s}an
+ */
+class Heightfield : public Shape {
+public:
+	Heightfield(const Properties &props) : Shape(props), m_data(NULL) {
+		m_sizeHint = Vector2i(
+			props.getInteger("width", -1),
+			props.getInteger("height", -1)
+		);
+
+		m_objectToWorld = props.getTransform("toWorld", Transform());
+	}
+
+	Heightfield(Stream *stream, InstanceManager *manager)
+		: Shape(stream, manager), m_data(NULL) {
+	}
+
+	~Heightfield() {
+		if (m_data) {
+			freeAligned(m_data);
+			for (int i=0; i<m_levelCount; ++i)
+				freeAligned(m_minmax[i]);
+			delete[] m_minmax;
+			delete[] m_levelSize;
+		}
+    }
+
+	void serialize(Stream *stream, InstanceManager *manager) const {
+		Shape::serialize(stream, manager);
+	}
+
+	AABB getAABB() const {
+		AABB aabb(
+			Point3(0, 0, m_minmax[m_levelCount-1][0].min),
+			Point3(m_levelSize[0].x, m_levelSize[0].y, m_minmax[m_levelCount-1][0].max)
+		);
+
+		AABB result;
+		for (int i=0; i<8; ++i)
+			result.expandBy(m_objectToWorld(aabb.getCorner(i)));
+
+		return result;
+	}
+
+	Float getSurfaceArea() const {
+		return 0; /// XXX
+	}
+
+	size_t getPrimitiveCount() const {
+		return 1;
+	}
+
+	size_t getEffectivePrimitiveCount() const {
+		return (size_t) m_levelSize[0].x * (size_t) m_levelSize[0].y;
+	}
+
+	struct StackEntry {
+		int level;
+		int x, y;
+	};
+
+#if 0
+	bool rayIntersect(const Ray &_ray, Float mint, Float maxt, Float &t, void *tmp) const {
+		StackEntry stack[MTS_KD_MAXDEPTH];
+		Ray ray;
+		m_objectToWorld(_ray, ray);
+		int idx = 0;
+
+		stack[idx].level = m_levelCount-1;
+		stack[idx].x = 0;
+		stack[idx].y = 0;
+
+		while (idx >= 0) {
+			const StackEntry &entry    = stack[idx];
+			const Vector2i &size       = m_levelSize[entry.level];
+			const Vector2i &blockSize  = m_levelBlockSize[entry.level];
+			const Interval &interval   = m_minmax[entry.level][entry.x + entry.y * m_levelSize[entry.level].x];
+
+			AABB aabb(
+				Point3(entry.x * blockSize.x, entry.y * blockSize.y, interval.min),
+				Point3((entry.x + 1) * blockSize.x, (entry.y + 1) * blockSize.y, interval.max)
+			);
+
+			Float nearT, farT;
+			bool match = aabb.rayIntersect(ray, nearT, farT);
+
+			if (!match || farT < mint || nearT > maxt) {
+				--idx;
+				continue;
+			}
+
+			if (level == 0) {
+				t = mint;
+				return true;
+			} else {
+			}
+		}
+
+		return false;
+	}
+#endif
+
+	// =============================================================
+	//  Ray <-> Bilinear patch intersection computation
+	//  Based on the paper "Ray Bilinear Patch Intersections"
+    //  by Ramsey et al., JGT Volume 9, 3, 2004
+	// =============================================================
+
+	struct PatchRecord {
+		Point2 uv;
+		Point p;
+		int x, y;
+	};
+
+	inline Float computeU(Float v, Float A1, Float A2, Float B1, Float B2,
+			Float C1, Float C2, Float D1, Float D2) const {
+		Float a = v*A2+B2;
+		Float b = v*(A2-A1)+B2-B1;
+
+		if (std::abs(b) >= std::abs(a))
+			return (v*(C1-C2)+D1-D2)/b;
+		else
+			return -(v*C2+D2)/a;
+	}
+
+
+	bool rayIntersectPatch(const Ray &ray, int x, int y, Float mint, Float maxt, Float &t, void *tmp) const {
+		int width = m_levelSize[0].x;
+
+		const Float
+			f00 = m_data[x   + y     * width],
+			f01 = m_data[x   + (y+1) * width],
+			f10 = m_data[x+1 + y     * width],
+			f11 = m_data[x+1 + (y+1) * width];
+
+		/* Variables for substitution */
+		const Float
+			a  = f01 + f10 - f11 - f00,
+			b  = f00 - f10,
+			c  = f00 - f01,
+		    dx = x - ray.o.x,
+		    dy = y - ray.o.y,
+		    dz = f00 - ray.o.z,
+			A1 = a*ray.d.x,
+			A2 = a*ray.d.y,
+			B1 = ray.d.z + b*ray.d.x,
+			B2 = b*ray.d.y,
+			C1 = c*ray.d.x,
+			C2 = ray.d.z + c*ray.d.y,
+			D1 = dx*ray.d.z - dz*ray.d.x,
+			D2 = dy*ray.d.z - dz*ray.d.y;
+
+		/* Quadratic equation coefficients */
+		Float A = A2*C1 - A1*C2;
+		Float B = A2*D1 - A1*D2 + B2*C1 - B1*C2;
+		Float C = B2*D1 - B1*D2;
+
+		const Float inf = std::numeric_limits<Float>::infinity();
+		Float v0, v1;
+		if (!solveQuadratic(A, B, C, v0, v1))
+			return false;
+
+		Float u0=inf, u1=inf, t0=inf, t1=inf;
+		Point p0, p1;
+
+		/* Validate & find coordinates of first potential intersection */
+		if (v0 >= 0 && v0 <= 1) {
+			u0 = computeU(v0, A1, A2, B1, B2, C1, C2, D1, D2);
+			if (u0 >= 0 && u0 <= 1) {
+				p0 = Point(x+u0, y+v0, (1.0f - u0) * ((1.0f - v0) * f00 + v0 * f01)
+					+ u0 * ((1.0f - v0) * f10 + v0 * f11));
+				t0 = dot(p0-ray.o, ray.d);
+				if (t0 < mint || t0 > maxt)
+					t0 = inf;
+			}
+		}
+
+		/* Validate & find coordinates of second potential intersection */
+		if (v1 >= 0 && v1 <= 1 && v1 != v0) {
+			u1 = computeU(v1, A1, A2, B1, B2, C1, C2, D1, D2);
+			if (u1 >= 0 && u1 <= 1) {
+				p1 = Point(x+u1, y+v1, (1.0f - u1) * ((1.0f - v1) * f00 + v1 * f01)
+					+ u1 * ((1.0f - v1) * f10 + v1 * f11));
+				t1 = dot(p1-ray.o, ray.d);
+				if (t1 < mint || t1 > maxt)
+					t1 = inf;
+			}
+		}
+
+		if (t0 == inf && t1 == inf)
+			return false;
+
+		if (tmp) {
+			PatchRecord &temp = *((PatchRecord *) tmp);
+			if (t0 <= t1) {
+				t = t0;
+				temp.uv = Point2(u0, v0);
+				temp.p  = p0;
+				temp.x  = x;
+				temp.y  = y;
+			} else {
+				t = t1;
+				temp.uv = Point2(u1, v1);
+				temp.p  = p1;
+				temp.x  = x;
+				temp.y  = y;
+			}
+		}
+
+		return true;
+	}
+
+	void fillIntersectionRecord(const Ray &ray,
+			const void *tmp, Intersection &its) const {
+		PatchRecord &temp = *((PatchRecord *) tmp);
+
+		int x = temp.x, y = temp.y, width = m_levelSize[0].x;
+		Float
+			f00 = m_data[x   + y     * width],
+			f01 = m_data[x   + (y+1) * width],
+			f10 = m_data[x+1 + y     * width],
+			f11 = m_data[x+1 + (y+1) * width];
+
+		its.uv = Point2((x+temp.uv.x) * m_invSize.x, (y+temp.uv.y) * m_invSize.y);
+		its.p  = temp.p;
+
+		its.dpdu = Vector(1, 0, (1.0f - its.uv.y) * (f10 - f00) + its.uv.y * (f11 - f01));
+		its.dpdv = Vector(0, 1, (1.0f - its.uv.x) * (f01 - f00) + its.uv.x * (f11 - f10));
+		its.geoFrame.n = cross(its.dpdu, its.dpdv);
+		its.geoFrame.s = normalize(its.dpdu);
+		its.geoFrame.t = normalize(its.dpdv);
+
+		its.shFrame = its.geoFrame;
+		its.shape = this;
+
+ 		its.wi = its.toLocal(-ray.d);
+ 		its.hasUVPartials = false;
+		its.instance = NULL;
+		its.time = ray.time;
+	}
+
+	bool rayIntersect(const Ray &ray, Float mint, Float maxt, Float &t, void *tmp) const {
+		for (int y=0; y<m_levelSize[0].y-1; ++y) {
+			for (int x=0; x<m_levelSize[0].x-1; ++x) {
+				if (rayIntersectPatch(ray, x, y, mint, maxt, t, tmp))
+					return true;
+			}
+		}
+		return false;
+	}
+
+	bool rayIntersect(const Ray &ray, Float mint, Float maxt) const {
+		Float t;
+		return rayIntersect(ray, mint, maxt, t, NULL);
+	}
+
+	void addChild(const std::string &name, ConfigurableObject *child) {
+		const Class *cClass = child->getClass();
+		if (cClass->derivesFrom(Texture::m_theClass)) {
+			ref<Bitmap> bitmap = static_cast<Texture *>(child)->getBitmap(m_sizeHint);
+
+			Vector2i size = bitmap->getSize();
+			if (size.x < 2) size.x = 2;
+			if (size.y < 2) size.y = 2;
+			if (!isPowerOfTwo(size.x)) size.x = (int) roundToPowerOfTwo((uint32_t) size.x);
+			if (!isPowerOfTwo(size.y)) size.y = (int) roundToPowerOfTwo((uint32_t) size.y);
+			m_sizeF = Vector2(size);
+			m_invSizeF = Vector2(1/size.x, 1/size.y);
+
+			if (bitmap->getSize() != size) {
+				Log(EWarn, "Heightfield texture size should be at least 2x2 and "
+					"a power of two. Resampling from %ix%i to %ix%i..",
+					bitmap->getWidth(), bitmap->getHeight(), size.x, size.y);
+
+				bitmap = bitmap->resample(NULL, ReconstructionFilter::EClamp,
+					ReconstructionFilter::EClamp, size,
+					-std::numeric_limits<Float>::infinity(),
+					std::numeric_limits<Float>::infinity());
+			}
+
+			m_data = (Float *) allocAligned((size_t) size.x * (size_t) size.y * sizeof(Float));
+			bitmap->convert(m_data, Bitmap::ELuminance, Bitmap::EFloat);
+
+			Log(EInfo, "Building acceleration data structure for %ix%i height field ..", size.x, size.y);
+
+			ref<Timer> timer = new Timer();
+			m_levelCount = (int) std::max(log2i((uint32_t) size.x), log2i((uint32_t) size.y)) + 1;
+
+			m_levelSize = new Vector2i[m_levelCount];
+			m_minmax = new Interval*[m_levelCount];
+			m_levelSize[0] = size;
+			m_minmax[0] = NULL;
+			size_t totalStorage = (size_t) size.x * (size_t) size.y * sizeof(float);
+			for (int i=0; m_levelSize[i].x > 1 || m_levelSize[i].y > 1; ++i) {
+				m_levelSize[i+1].x = m_levelSize[i].x > 1 ? (m_levelSize[i].x / 2) : 1;
+				m_levelSize[i+1].y = m_levelSize[i].y > 1 ? (m_levelSize[i].y / 2) : 1;
+				size_t size = (size_t) m_levelSize[i+1].x * (size_t) m_levelSize[i+1].y * sizeof(Interval);
+				m_minmax[i+1] = (Interval *) allocAligned(size);
+				totalStorage += size;
+			}
+
+			/* Build the lowest MIP layer directly from the heightfield data */
+			Interval *bounds = m_minmax[1];
+			for (int y=0; y<m_levelSize[1].y; ++y) {
+				int y0 = std::min(2*y,   m_levelSize[0].y-1),
+					y1 = std::min(2*y+1, m_levelSize[0].y-1);
+				for (int x=0; x<m_levelSize[1].x; ++x) {
+					int x0 = std::min(2*x,   m_levelSize[0].x-1),
+						x1 = std::min(2*x+1, m_levelSize[0].x-1);
+					Float v00 = m_data[y0 * m_levelSize[0].x + x0];
+					Float v01 = m_data[y0 * m_levelSize[0].x + x1];
+					Float v10 = m_data[y1 * m_levelSize[0].x + x0];
+					Float v11 = m_data[y1 * m_levelSize[0].x + x1];
+					Float vmin = std::min(std::min(v00, v01), std::min(v10, v11));
+					Float vmax = std::max(std::max(v00, v01), std::max(v10, v11));
+					*bounds++ = Interval(vmin, vmax);
+				}
+			}
+
+			/* Propagate height bounds upwards to the other layers */
+			for (int level=2; level<m_levelCount; ++level) {
+				const Vector2i &cur  = m_levelSize[level],
+					           &prev = m_levelSize[level-1];
+				Interval *curBounds  = m_minmax[level];
+				Interval *prevBounds = m_minmax[level-1];
+				for (int y=0; y<cur.y; ++y) {
+					int y0 = std::min(2*y,   prev.y-1),
+						y1 = std::min(2*y+1, prev.y-1);
+					for (int x=0; x<cur.x; ++x) {
+						int x0 = std::min(2*x,   prev.x-1),
+							x1 = std::min(2*x+1, prev.x-1);
+						const Interval &v00 = prevBounds[y0 * prev.x + x0], &v01 = prevBounds[y0 * prev.x + x1];
+						const Interval &v10 = prevBounds[y1 * prev.x + x0], &v11 = prevBounds[y1 * prev.x + x1];
+						Interval combined(v00);
+						combined.expandBy(v01);
+						combined.expandBy(v10);
+						combined.expandBy(v11);
+						*curBounds++ = combined;
+					}
+				}
+			}
+			Log(EInfo, "Done (took %i ms, %s)", timer->getMilliseconds(),
+					memString(totalStorage).c_str());
+		} else {
+			Shape::addChild(name, child);
+		}
+	}
+
+	std::string toString() const {
+		std::ostringstream oss;
+		oss << "Heightfield[" << endl
+			<< "]";
+		return oss.str();
+	}
+
+	MTS_DECLARE_CLASS()
+private:
+	Vector2i m_sizeHint, *m_levelSize, *m_levelBlockSize;
+	Vector2 m_sizeF, m_invSizeF;
+	int m_levelCount;
+	Float *m_data;
+	Interval **m_minmax;
+	Transform m_objectToWorld;
+};
+
+MTS_IMPLEMENT_CLASS_S(Heightfield, false, Shape)
+MTS_EXPORT_PLUGIN(Heightfield, "Height field intersection primitive");
+MTS_NAMESPACE_END
+
diff --git a/src/textures/bitmap.cpp b/src/textures/bitmap.cpp
index da5b45b1..d20805be 100644
--- a/src/textures/bitmap.cpp
+++ b/src/textures/bitmap.cpp
@@ -428,7 +428,7 @@ public:
 		return result;
 	}
 
-	ref<Bitmap> getBitmap(const Vector2i &/* unused */) {
+	ref<Bitmap> getBitmap(const Vector2i &/* unused */) const {
 		return m_mipmap1.get() ? m_mipmap1->toBitmap() : m_mipmap3->toBitmap();
 	}
 
diff --git a/src/textures/scale.cpp b/src/textures/scale.cpp
index 852dbab8..94270396 100644
--- a/src/textures/scale.cpp
+++ b/src/textures/scale.cpp
@@ -37,6 +37,7 @@ MTS_NAMESPACE_BEGIN
  * contents by a user-specified value. This can be quite useful when a
  * texture is too dark or too bright. The plugin can also be used to adjust
  * the height of a bump map when using the \pluginref{bump} plugin.
+ *
  * \begin{xml}[caption=Scaling the contents of a bitmap texture]
  * <texture type="scale">
  *     <float name="scale" value="0.5"/>
@@ -46,7 +47,6 @@ MTS_NAMESPACE_BEGIN
  *     </texture>
  * </texture>
  * \end{xml}
-
  */
 
 class ScalingTexture : public Texture {
@@ -102,6 +102,23 @@ public:
 		return m_nested->isConstant();
 	}
 
+	ref<Bitmap> getBitmap(const Vector2i &sizeHint) const {
+		ref<Bitmap> result = m_nested->getBitmap(sizeHint);
+
+		if (m_scale == Spectrum(m_scale[0])) {
+			result->scale(m_scale[0]);
+		} else {
+			result = result->convert(Bitmap::ESpectrum, Bitmap::EFloat);
+
+			Spectrum *data = (Spectrum *) result->getFloatData();
+			size_t pixelCount = result->getPixelCount();
+			for (size_t i=0; i<pixelCount; ++i)
+				*data++ *= m_scale;
+		}
+
+		return result;
+	}
+
 	std::string toString() const {
 		std::ostringstream oss;
 		oss << "ScalingTexture[" << endl

From 36d4bc7ea204b4af34dd24231069d276ca9340ff Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Fri, 6 Sep 2013 21:22:46 +0200
Subject: [PATCH 12/43] committed nonfunctional version for now

---
 src/shapes/heightfield.cpp | 362 +++++++++++++++++--------------------
 1 file changed, 168 insertions(+), 194 deletions(-)

diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
index 505aac83..c88a436a 100644
--- a/src/shapes/heightfield.cpp
+++ b/src/shapes/heightfield.cpp
@@ -23,13 +23,24 @@
 #include <mitsuba/core/bitmap.h>
 #include <mitsuba/core/timer.h>
 
+
+#define MTS_QTREE_MAXDEPTH 24 /* Up to 16M x 16M */
+
+		/* TODO: special case for perpendicular rays */
 MTS_NAMESPACE_BEGIN
 
-/*!\plugin{heightfield}{Height field}
- * \order{12}
- *
- * Developed by Milo\^{s} Ha\^{s}an
- */
+namespace {
+	/// Find the smallest t >= 0 such that a*t + b is a multiple of c
+	inline Float nextMultiple(Float a, Float b, Float c) {
+		if (a == 0)
+			return std::numeric_limits<Float>::infinity();
+		else if (a > 0)
+			return (std::ceil(b/c)*c - b) / a;
+		else
+			return (std::floor(b/c)*c - b) / a;
+	}
+};
+
 class Heightfield : public Shape {
 public:
 	Heightfield(const Properties &props) : Shape(props), m_data(NULL) {
@@ -52,6 +63,7 @@ public:
 				freeAligned(m_minmax[i]);
 			delete[] m_minmax;
 			delete[] m_levelSize;
+			delete[] m_blockSizeF;
 		}
     }
 
@@ -60,14 +72,9 @@ public:
 	}
 
 	AABB getAABB() const {
-		AABB aabb(
-			Point3(0, 0, m_minmax[m_levelCount-1][0].min),
-			Point3(m_levelSize[0].x, m_levelSize[0].y, m_minmax[m_levelCount-1][0].max)
-		);
-
 		AABB result;
 		for (int i=0; i<8; ++i)
-			result.expandBy(m_objectToWorld(aabb.getCorner(i)));
+			result.expandBy(m_objectToWorld(m_dataAABB.getCorner(i)));
 
 		return result;
 	}
@@ -83,31 +90,58 @@ public:
 	size_t getEffectivePrimitiveCount() const {
 		return (size_t) m_levelSize[0].x * (size_t) m_levelSize[0].y;
 	}
-
 	struct StackEntry {
-		int level;
-		int x, y;
+		uint32_t level;
+		uint32_t x, y;
+
+		inline std::string toString() const {
+			std::ostringstream oss;
+			oss << "StackEntry[level=" << level << ", x=" << x << ", y=" << y << "]" << endl;
+			return oss.str();
+		}
 	};
 
-#if 0
 	bool rayIntersect(const Ray &_ray, Float mint, Float maxt, Float &t, void *tmp) const {
-		StackEntry stack[MTS_KD_MAXDEPTH];
+		StackEntry stack[MTS_QTREE_MAXDEPTH];
+
+		/* Transform ray into object space */
 		Ray ray;
-		m_objectToWorld(_ray, ray);
-		int idx = 0;
+		m_objectToWorld.inverse()(_ray, ray);
 
-		stack[idx].level = m_levelCount-1;
-		stack[idx].x = 0;
-		stack[idx].y = 0;
+		/* Clamp ray to the bounding box */
+	    Float nearT, farT;
+	    if (!m_dataAABB.rayIntersect(ray, nearT, farT))
+			return false;
+		if (nearT > mint) mint = nearT;
+		if (farT  < maxt) maxt = farT;
+		if (maxt <= mint) return false;
 
-		while (idx >= 0) {
-			const StackEntry &entry    = stack[idx];
-			const Vector2i &size       = m_levelSize[entry.level];
-			const Vector2i &blockSize  = m_levelBlockSize[entry.level];
-			const Interval &interval   = m_minmax[entry.level][entry.x + entry.y * m_levelSize[entry.level].x];
+		/* Rescale the ray so that it has unit length in 2D */
+//		Float length2D = hypot2(ray.d.x * (maxt-mint), ray.d.y * (maxt-mint));
+//		ray.d /= length2D; ray.dRcp *= length2D;
 
+		/* Ray length to cross a single cell along the X or Y axis */
+		Float tDeltaXSingle  = std::abs(ray.dRcp.x),
+		      tDeltaYSingle  = std::abs(ray.dRcp.y);
+
+		/* Cell coordinate increments for steps along the ray */
+		int iDeltaX = (int) signum(ray.d.x),
+			iDeltaY = (int) signum(ray.d.y);
+
+		int stackIdx = 0;
+		stack[stackIdx].level = m_levelCount-1;
+		stack[stackIdx].x = 0;
+		stack[stackIdx].y = 0;
+
+		while (stackIdx >= 0) {
+			StackEntry entry         = stack[stackIdx];
+			const Vector2i &size     = m_levelSize[entry.level];
+			const Interval &interval = m_minmax[entry.level][entry.x + entry.y * m_levelSize[entry.level].x];
+			const Vector2 &blockSize = m_blockSizeF[entry.level];
+
+			/* Intersect against the current min-max quadtree node */
 			AABB aabb(
-				Point3(entry.x * blockSize.x, entry.y * blockSize.y, interval.min),
+				Point3(entry.x       * blockSize.x, entry.y       * blockSize.y, interval.min),
 				Point3((entry.x + 1) * blockSize.x, (entry.y + 1) * blockSize.y, interval.max)
 			);
 
@@ -115,131 +149,62 @@ public:
 			bool match = aabb.rayIntersect(ray, nearT, farT);
 
 			if (!match || farT < mint || nearT > maxt) {
-				--idx;
+				/* Miss -> pop the stack */
+				--stackIdx;
 				continue;
 			}
 
-			if (level == 0) {
-				t = mint;
-				return true;
+			if (entry.level > 0) {
+				/* Inner node -- push child nodes in 2D DDA order */
+				const Vector2 &subBlockSize = m_blockSizeF[entry.level+1];
+				Point2 p(ray.o.x + ray.d.x * nearT,
+				         ray.o.y + ray.d.y * nearT);
+
+				int nextLevel = entry.level + 1;
+				int x = p.x - aabb.min.x >= subBlockSize.x ? 1 : 0;
+				int y = p.y - aabb.min.y >= subBlockSize.y ? 1 : 0;
+
+				floorToInt(p.x / subBlockSize.x); /// Uh oh, this can't work
+				int y = floorToInt(p.y / subBlockSize.y);
+
+				Float tDeltaX = tDeltaXSingle * subBlockSize.x,
+				      tDeltaY = tDeltaYSingle * subBlockSize.y,
+					  tNextX  = nextMultiple(ray.d.x, p.x, subBlockSize.x),
+					  tNextY  = nextMultiple(ray.d.y, p.y, subBlockSize.y);
+
+				Float t = 0;
+				while (t < maxt) {
+					if (tNextX < tNextY) {
+						tNextX += tDeltaX;
+						x += iDeltaX;
+					} else {
+						tNextY += tDeltaY;
+						y += iDeltaY;
+					}
+
+					
+
+					stack[stackIdx+1].level = entry.level + 1;
+					stack[stackIdx+1].x = x;
+					stack[stackIdx+1].y = y;
+					++stackIdx;
+				}
 			} else {
+				cout << "X(" << entry.x + 1 << ", " << entry.y+1 << ") = 1;" << endl;
+				--stackIdx;
 			}
 		}
 
 		return false;
 	}
-#endif
-
-	// =============================================================
-	//  Ray <-> Bilinear patch intersection computation
-	//  Based on the paper "Ray Bilinear Patch Intersections"
-    //  by Ramsey et al., JGT Volume 9, 3, 2004
-	// =============================================================
 
+#if 0
 	struct PatchRecord {
 		Point2 uv;
 		Point p;
 		int x, y;
 	};
 
-	inline Float computeU(Float v, Float A1, Float A2, Float B1, Float B2,
-			Float C1, Float C2, Float D1, Float D2) const {
-		Float a = v*A2+B2;
-		Float b = v*(A2-A1)+B2-B1;
-
-		if (std::abs(b) >= std::abs(a))
-			return (v*(C1-C2)+D1-D2)/b;
-		else
-			return -(v*C2+D2)/a;
-	}
-
-
-	bool rayIntersectPatch(const Ray &ray, int x, int y, Float mint, Float maxt, Float &t, void *tmp) const {
-		int width = m_levelSize[0].x;
-
-		const Float
-			f00 = m_data[x   + y     * width],
-			f01 = m_data[x   + (y+1) * width],
-			f10 = m_data[x+1 + y     * width],
-			f11 = m_data[x+1 + (y+1) * width];
-
-		/* Variables for substitution */
-		const Float
-			a  = f01 + f10 - f11 - f00,
-			b  = f00 - f10,
-			c  = f00 - f01,
-		    dx = x - ray.o.x,
-		    dy = y - ray.o.y,
-		    dz = f00 - ray.o.z,
-			A1 = a*ray.d.x,
-			A2 = a*ray.d.y,
-			B1 = ray.d.z + b*ray.d.x,
-			B2 = b*ray.d.y,
-			C1 = c*ray.d.x,
-			C2 = ray.d.z + c*ray.d.y,
-			D1 = dx*ray.d.z - dz*ray.d.x,
-			D2 = dy*ray.d.z - dz*ray.d.y;
-
-		/* Quadratic equation coefficients */
-		Float A = A2*C1 - A1*C2;
-		Float B = A2*D1 - A1*D2 + B2*C1 - B1*C2;
-		Float C = B2*D1 - B1*D2;
-
-		const Float inf = std::numeric_limits<Float>::infinity();
-		Float v0, v1;
-		if (!solveQuadratic(A, B, C, v0, v1))
-			return false;
-
-		Float u0=inf, u1=inf, t0=inf, t1=inf;
-		Point p0, p1;
-
-		/* Validate & find coordinates of first potential intersection */
-		if (v0 >= 0 && v0 <= 1) {
-			u0 = computeU(v0, A1, A2, B1, B2, C1, C2, D1, D2);
-			if (u0 >= 0 && u0 <= 1) {
-				p0 = Point(x+u0, y+v0, (1.0f - u0) * ((1.0f - v0) * f00 + v0 * f01)
-					+ u0 * ((1.0f - v0) * f10 + v0 * f11));
-				t0 = dot(p0-ray.o, ray.d);
-				if (t0 < mint || t0 > maxt)
-					t0 = inf;
-			}
-		}
-
-		/* Validate & find coordinates of second potential intersection */
-		if (v1 >= 0 && v1 <= 1 && v1 != v0) {
-			u1 = computeU(v1, A1, A2, B1, B2, C1, C2, D1, D2);
-			if (u1 >= 0 && u1 <= 1) {
-				p1 = Point(x+u1, y+v1, (1.0f - u1) * ((1.0f - v1) * f00 + v1 * f01)
-					+ u1 * ((1.0f - v1) * f10 + v1 * f11));
-				t1 = dot(p1-ray.o, ray.d);
-				if (t1 < mint || t1 > maxt)
-					t1 = inf;
-			}
-		}
-
-		if (t0 == inf && t1 == inf)
-			return false;
-
-		if (tmp) {
-			PatchRecord &temp = *((PatchRecord *) tmp);
-			if (t0 <= t1) {
-				t = t0;
-				temp.uv = Point2(u0, v0);
-				temp.p  = p0;
-				temp.x  = x;
-				temp.y  = y;
-			} else {
-				t = t1;
-				temp.uv = Point2(u1, v1);
-				temp.p  = p1;
-				temp.x  = x;
-				temp.y  = y;
-			}
-		}
-
-		return true;
-	}
-
 	void fillIntersectionRecord(const Ray &ray,
 			const void *tmp, Intersection &its) const {
 		PatchRecord &temp = *((PatchRecord *) tmp);
@@ -268,16 +233,7 @@ public:
 		its.instance = NULL;
 		its.time = ray.time;
 	}
-
-	bool rayIntersect(const Ray &ray, Float mint, Float maxt, Float &t, void *tmp) const {
-		for (int y=0; y<m_levelSize[0].y-1; ++y) {
-			for (int x=0; x<m_levelSize[0].x-1; ++x) {
-				if (rayIntersectPatch(ray, x, y, mint, maxt, t, tmp))
-					return true;
-			}
-		}
-		return false;
-	}
+#endif
 
 	bool rayIntersect(const Ray &ray, Float mint, Float maxt) const {
 		Float t;
@@ -289,58 +245,50 @@ public:
 		if (cClass->derivesFrom(Texture::m_theClass)) {
 			ref<Bitmap> bitmap = static_cast<Texture *>(child)->getBitmap(m_sizeHint);
 
-			Vector2i size = bitmap->getSize();
-			if (size.x < 2) size.x = 2;
-			if (size.y < 2) size.y = 2;
-			if (!isPowerOfTwo(size.x)) size.x = (int) roundToPowerOfTwo((uint32_t) size.x);
-			if (!isPowerOfTwo(size.y)) size.y = (int) roundToPowerOfTwo((uint32_t) size.y);
-			m_sizeF = Vector2(size);
-			m_invSizeF = Vector2(1/size.x, 1/size.y);
+			m_dataSize = bitmap->getSize();
+			if (m_dataSize.x < 2) m_dataSize.x = 2;
+			if (m_dataSize.y < 2) m_dataSize.y = 2;
+			if (!isPowerOfTwo(m_dataSize.x - 1)) m_dataSize.x = (int) roundToPowerOfTwo((uint32_t) m_dataSize.x - 1) + 1;
+			if (!isPowerOfTwo(m_dataSize.y - 1)) m_dataSize.y = (int) roundToPowerOfTwo((uint32_t) m_dataSize.y - 1) + 1;
 
-			if (bitmap->getSize() != size) {
-				Log(EWarn, "Heightfield texture size should be at least 2x2 and "
-					"a power of two. Resampling from %ix%i to %ix%i..",
-					bitmap->getWidth(), bitmap->getHeight(), size.x, size.y);
+			if (bitmap->getSize() != m_dataSize) {
+				Log(EInfo, "Resampling heightfield texture from %ix%i to %ix%i..",
+					bitmap->getWidth(), bitmap->getHeight(), m_dataSize.x, m_dataSize.y);
 
 				bitmap = bitmap->resample(NULL, ReconstructionFilter::EClamp,
-					ReconstructionFilter::EClamp, size,
+					ReconstructionFilter::EClamp, m_dataSize,
 					-std::numeric_limits<Float>::infinity(),
 					std::numeric_limits<Float>::infinity());
 			}
 
-			m_data = (Float *) allocAligned((size_t) size.x * (size_t) size.y * sizeof(Float));
+			size_t size = (size_t) m_dataSize.x * (size_t) m_dataSize.y * sizeof(Float),
+			       storageSize = size;
+			m_data = (Float *) allocAligned(size);
 			bitmap->convert(m_data, Bitmap::ELuminance, Bitmap::EFloat);
 
-			Log(EInfo, "Building acceleration data structure for %ix%i height field ..", size.x, size.y);
+			Log(EInfo, "Building acceleration data structure for %ix%i height field ..", m_dataSize.x, m_dataSize.y);
 
 			ref<Timer> timer = new Timer();
-			m_levelCount = (int) std::max(log2i((uint32_t) size.x), log2i((uint32_t) size.y)) + 1;
+			m_levelCount = (int) std::max(log2i((uint32_t) m_dataSize.x-1), log2i((uint32_t) m_dataSize.y-1)) + 1;
 
 			m_levelSize = new Vector2i[m_levelCount];
+			m_blockSizeF = new Vector2[m_levelCount];
 			m_minmax = new Interval*[m_levelCount];
-			m_levelSize[0] = size;
-			m_minmax[0] = NULL;
-			size_t totalStorage = (size_t) size.x * (size_t) size.y * sizeof(float);
-			for (int i=0; m_levelSize[i].x > 1 || m_levelSize[i].y > 1; ++i) {
-				m_levelSize[i+1].x = m_levelSize[i].x > 1 ? (m_levelSize[i].x / 2) : 1;
-				m_levelSize[i+1].y = m_levelSize[i].y > 1 ? (m_levelSize[i].y / 2) : 1;
-				size_t size = (size_t) m_levelSize[i+1].x * (size_t) m_levelSize[i+1].y * sizeof(Interval);
-				m_minmax[i+1] = (Interval *) allocAligned(size);
-				totalStorage += size;
-			}
+
+			m_levelSize[0]  = Vector2i(m_dataSize.x - 1, m_dataSize.y - 1);
+			m_blockSizeF[0] = Vector2(1, 1);
+			size = (size_t) m_levelSize[0].x * (size_t) m_levelSize[0].y * sizeof(Interval);
+			m_minmax[0] = (Interval *) allocAligned(size);
+			storageSize += size;
 
 			/* Build the lowest MIP layer directly from the heightfield data */
-			Interval *bounds = m_minmax[1];
-			for (int y=0; y<m_levelSize[1].y; ++y) {
-				int y0 = std::min(2*y,   m_levelSize[0].y-1),
-					y1 = std::min(2*y+1, m_levelSize[0].y-1);
-				for (int x=0; x<m_levelSize[1].x; ++x) {
-					int x0 = std::min(2*x,   m_levelSize[0].x-1),
-						x1 = std::min(2*x+1, m_levelSize[0].x-1);
-					Float v00 = m_data[y0 * m_levelSize[0].x + x0];
-					Float v01 = m_data[y0 * m_levelSize[0].x + x1];
-					Float v10 = m_data[y1 * m_levelSize[0].x + x0];
-					Float v11 = m_data[y1 * m_levelSize[0].x + x1];
+			Interval *bounds = m_minmax[0];
+			for (int y=0; y<m_levelSize[0].y; ++y) {
+				for (int x=0; x<m_levelSize[0].x; ++x) {
+					Float v00 = m_data[y * m_levelSize[0].x + x];
+					Float v01 = m_data[y * m_levelSize[0].x + x + 1];
+					Float v10 = m_data[(y + 1) * m_levelSize[0].x + x];
+					Float v11 = m_data[(y + 1) * m_levelSize[0].x + x + 1];
 					Float vmin = std::min(std::min(v00, v01), std::min(v10, v11));
 					Float vmax = std::max(std::max(v00, v01), std::max(v10, v11));
 					*bounds++ = Interval(vmin, vmax);
@@ -348,11 +296,25 @@ public:
 			}
 
 			/* Propagate height bounds upwards to the other layers */
-			for (int level=2; level<m_levelCount; ++level) {
-				const Vector2i &cur  = m_levelSize[level],
-					           &prev = m_levelSize[level-1];
-				Interval *curBounds  = m_minmax[level];
-				Interval *prevBounds = m_minmax[level-1];
+			for (int level=1; level<m_levelCount; ++level) {
+				Vector2i &cur  = m_levelSize[level],
+				         &prev = m_levelSize[level-1];
+
+				/* Calculate size of this layer */
+				cur.x = prev.x > 1 ? (prev.x / 2) : 1;
+				cur.y = prev.y > 1 ? (prev.y / 2) : 1;
+				m_blockSizeF[level] = Vector2(
+					m_levelSize[0].x / cur.x,
+					m_levelSize[0].y / cur.y
+				);
+
+				/* Allocate memory for interval data */
+				Interval *prevBounds = m_minmax[level-1], *curBounds;
+				size_t size = (size_t) cur.x * (size_t) cur.y * sizeof(Interval);
+				m_minmax[level] = curBounds = (Interval *) allocAligned(size);
+				storageSize += size;
+
+				/* Build by querying the previous layer */
 				for (int y=0; y<cur.y; ++y) {
 					int y0 = std::min(2*y,   prev.y-1),
 						y1 = std::min(2*y+1, prev.y-1);
@@ -369,8 +331,13 @@ public:
 					}
 				}
 			}
-			Log(EInfo, "Done (took %i ms, %s)", timer->getMilliseconds(),
-					memString(totalStorage).c_str());
+			Log(EInfo, "Done (took %i ms, uses %s of memory)", timer->getMilliseconds(),
+					memString(storageSize).c_str());
+
+			m_dataAABB = AABB(
+				Point3(0, 0, m_minmax[m_levelCount-1][0].min),
+				Point3(m_levelSize[0].x, m_levelSize[0].y, m_minmax[m_levelCount-1][0].max)
+			);
 		} else {
 			Shape::addChild(name, child);
 		}
@@ -385,12 +352,19 @@ public:
 
 	MTS_DECLARE_CLASS()
 private:
-	Vector2i m_sizeHint, *m_levelSize, *m_levelBlockSize;
-	Vector2 m_sizeF, m_invSizeF;
-	int m_levelCount;
-	Float *m_data;
-	Interval **m_minmax;
 	Transform m_objectToWorld;
+	Vector2i m_sizeHint;
+	AABB m_dataAABB;
+
+	/* Height field data */
+	Float *m_data;
+	Vector2i m_dataSize;
+
+	/* Min-max quadtree data */
+	int m_levelCount;
+	Vector2i *m_levelSize;
+	Vector2 *m_blockSizeF;
+	Interval **m_minmax;
 };
 
 MTS_IMPLEMENT_CLASS_S(Heightfield, false, Shape)

From c77db14313f63f4ddea786c0a17d5cfa7ba13ab2 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Fri, 6 Sep 2013 22:15:41 +0200
Subject: [PATCH 13/43] typo fix

---
 include/mitsuba/core/ray.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mitsuba/core/ray.h b/include/mitsuba/core/ray.h
index ae1a81d1..669d997f 100644
--- a/include/mitsuba/core/ray.h
+++ b/include/mitsuba/core/ray.h
@@ -98,7 +98,7 @@ template <typename _PointType, typename _VectorType> struct TRay {
 	/// Set the origin
 	inline void setOrigin(const PointType &pos) { o = pos; }
 
-	/// Set the origin
+	/// Set the time
 	inline void setTime(Scalar tval) { time = tval; }
 
 	/// Set the direction and update the reciprocal

From 960e16dd4c540c5ec898af6f82ba1b7fd19a90bc Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Mon, 9 Sep 2013 11:24:46 +0200
Subject: [PATCH 14/43] more work on heightfield primitive

---
 src/shapes/heightfield.cpp | 143 +++++++++++++++++++++++++------------
 1 file changed, 98 insertions(+), 45 deletions(-)

diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
index c88a436a..de691b24 100644
--- a/src/shapes/heightfield.cpp
+++ b/src/shapes/heightfield.cpp
@@ -23,10 +23,8 @@
 #include <mitsuba/core/bitmap.h>
 #include <mitsuba/core/timer.h>
 
-
 #define MTS_QTREE_MAXDEPTH 24 /* Up to 16M x 16M */
 
-		/* TODO: special case for perpendicular rays */
 MTS_NAMESPACE_BEGIN
 
 namespace {
@@ -63,6 +61,7 @@ public:
 				freeAligned(m_minmax[i]);
 			delete[] m_minmax;
 			delete[] m_levelSize;
+			delete[] m_numChildren;
 			delete[] m_blockSizeF;
 		}
     }
@@ -80,7 +79,7 @@ public:
 	}
 
 	Float getSurfaceArea() const {
-		return 0; /// XXX
+		return m_surfaceArea; /// XXX transformed surface area? ...
 	}
 
 	size_t getPrimitiveCount() const {
@@ -91,8 +90,8 @@ public:
 		return (size_t) m_levelSize[0].x * (size_t) m_levelSize[0].y;
 	}
 	struct StackEntry {
-		uint32_t level;
-		uint32_t x, y;
+		int level;
+		int x, y;
 
 		inline std::string toString() const {
 			std::ostringstream oss;
@@ -101,6 +100,12 @@ public:
 		}
 	};
 
+	struct PatchRecord {
+		Point2 uv;
+		Point p;
+		int x, y;
+	};
+
 	bool rayIntersect(const Ray &_ray, Float mint, Float maxt, Float &t, void *tmp) const {
 		StackEntry stack[MTS_QTREE_MAXDEPTH];
 
@@ -108,13 +113,7 @@ public:
 		Ray ray;
 		m_objectToWorld.inverse()(_ray, ray);
 
-		/* Clamp ray to the bounding box */
-	    Float nearT, farT;
-	    if (!m_dataAABB.rayIntersect(ray, nearT, farT))
-			return false;
-		if (nearT > mint) mint = nearT;
-		if (farT  < maxt) maxt = farT;
-		if (maxt <= mint) return false;
+		cout << "Tracing ray " << ray.toString() << maxt << endl;
 
 		/* Rescale the ray so that it has unit length in 2D */
 //		Float length2D = hypot2(ray.d.x * (maxt-mint), ray.d.y * (maxt-mint));
@@ -128,17 +127,26 @@ public:
 		int iDeltaX = (int) signum(ray.d.x),
 			iDeltaY = (int) signum(ray.d.y);
 
+		if (iDeltaX == 0 && iDeltaY == 0) {
+			/* TODO: special case for perpendicular rays. Also, provide int version of signum */
+			return false;
+		}
+
+		cout << "tDeltaSingle=" << tDeltaXSingle << ", " << tDeltaYSingle << endl;
+		cout << "iDeltaX=" << iDeltaX << ", " << iDeltaY << endl;
+
 		int stackIdx = 0;
 		stack[stackIdx].level = m_levelCount-1;
 		stack[stackIdx].x = 0;
 		stack[stackIdx].y = 0;
 
 		while (stackIdx >= 0) {
-			StackEntry entry         = stack[stackIdx];
-			const Vector2i &size     = m_levelSize[entry.level];
+			StackEntry entry         = stack[stackIdx--];
 			const Interval &interval = m_minmax[entry.level][entry.x + entry.y * m_levelSize[entry.level].x];
 			const Vector2 &blockSize = m_blockSizeF[entry.level];
 
+			cout << endl << "Processing stack entry: " << entry.toString() << endl;
+
 			/* Intersect against the current min-max quadtree node */
 			AABB aabb(
 				Point3(entry.x       * blockSize.x, entry.y       * blockSize.y, interval.min),
@@ -147,51 +155,90 @@ public:
 
 			Float nearT, farT;
 			bool match = aabb.rayIntersect(ray, nearT, farT);
-
-			if (!match || farT < mint || nearT > maxt) {
-				/* Miss -> pop the stack */
-				--stackIdx;
+			if (!match) {
+				cout << "Miss (1)!" << endl;
 				continue;
 			}
 
+			nearT = std::max(nearT, mint); farT = std::min(farT, maxt);
+			if (nearT > farT) {
+				cout << "Miss (2), nearT=" << nearT << ", farT=" << farT << "!" << endl;
+				continue;
+			}
+
+			Point2 enter(ray.o.x + ray.d.x * nearT - aabb.min.x,
+				         ray.o.y + ray.d.y * nearT - aabb.min.y);
+			Float tMax = farT - nearT;
+
+			cout << "enter=" << enter.toString() << endl;
+
 			if (entry.level > 0) {
 				/* Inner node -- push child nodes in 2D DDA order */
-				const Vector2 &subBlockSize = m_blockSizeF[entry.level+1];
-				Point2 p(ray.o.x + ray.d.x * nearT,
-				         ray.o.y + ray.d.y * nearT);
+				const Vector2i &numChildren = m_numChildren[entry.level];
+				const Vector2 &subBlockSize = m_blockSizeF[--entry.level];
+				entry.x *= numChildren.x; entry.y *= numChildren.y;
 
-				int nextLevel = entry.level + 1;
-				int x = p.x - aabb.min.x >= subBlockSize.x ? 1 : 0;
-				int y = p.y - aabb.min.y >= subBlockSize.y ? 1 : 0;
-
-				floorToInt(p.x / subBlockSize.x); /// Uh oh, this can't work
-				int y = floorToInt(p.y / subBlockSize.y);
+				int x = (enter.x >= subBlockSize.x) ? numChildren.x-1 : 0; /// precompute, then use ceil?
+				int y = (enter.y >= subBlockSize.y) ? numChildren.y-1 : 0;
 
 				Float tDeltaX = tDeltaXSingle * subBlockSize.x,
 				      tDeltaY = tDeltaYSingle * subBlockSize.y,
 					  tNextX  = nextMultiple(ray.d.x, p.x, subBlockSize.x),
-					  tNextY  = nextMultiple(ray.d.y, p.y, subBlockSize.y);
+					  tNextY  = nextMultiple(ray.d.y, p.y, subBlockSize.y),
+					  t       = 0;
+
+				cout << "x,y=" << x << "," << y << endl;
+				cout << "tDelta=" << tDeltaX << ", " << tDeltaY << endl;
+				cout << "tNext=" << tNextX << ", " << tNextY << endl;
+
+				while ((uint32_t) x < (uint32_t) numChildren.x &&
+				       (uint32_t) y < (uint32_t) numChildren.y && t < tMax) {
+					stack[++stackIdx].level = entry.level;
+					stack[stackIdx].x = entry.x + x;
+					stack[stackIdx].y = entry.y + y;
+					cout << "Adding node" << stack[stackIdx].toString() << endl;
 
-				Float t = 0;
-				while (t < maxt) {
 					if (tNextX < tNextY) {
+						t = tNextX;
 						tNextX += tDeltaX;
 						x += iDeltaX;
 					} else {
+						t = tNextY;
 						tNextY += tDeltaY;
 						y += iDeltaY;
 					}
-
-					
-
-					stack[stackIdx+1].level = entry.level + 1;
-					stack[stackIdx+1].x = x;
-					stack[stackIdx+1].y = y;
-					++stackIdx;
 				}
 			} else {
-				cout << "X(" << entry.x + 1 << ", " << entry.y+1 << ") = 1;" << endl;
-				--stackIdx;
+				Float
+					f00 = m_data[entry.y * m_dataSize.x + entry.x],
+					f01 = m_data[(entry.y + 1) * m_dataSize.x + entry.x],
+					f10 = m_data[entry.y * m_dataSize.x + entry.x + 1],
+					f11 = m_data[(entry.y + 1) * m_dataSize.x + entry.x + 1];
+
+				Float A = ray.d.x * ray.d.y * (f00 - f01 - f10 + f11);
+				Float B = ray.d.y * (f01 - f00 + enter.x * (f00 - f01 - f10 + f11))
+				        + ray.d.x * (f10 - f00 + enter.y * (f00 - f01 - f10 + f11))
+						- ray.d.z;
+				Float C = (enter.x - 1) * (enter.y - 1) * f00
+				        + enter.y * f01 + enter.x * (f10 - enter.y * (f01 + f10 - f11))
+				        - ray.o.y - ray.d.y * nearT;
+
+				cout << "Leaf node. Quadratic polynomial: " << A << ", " << B << ", " << C << endl;
+
+				Float t0, t1;
+				if (!solveQuadratic(A, B, C, t0, t1)) {
+					cout << "No solution!" << endl;
+					continue;
+				}
+				cout << "Solved for t0=" << t0 << ", t1=" << t1 << "." << endl;
+				if (t0 < 0 || t1 > tMax) {
+					cout << "Solution is out of bounds!" << endl;
+					continue;
+				}
+				t = (t0 >= 0) ? t0 : t1;
+				cout << "Decided on t=" << t << endl;
+
+				return true;
 			}
 		}
 
@@ -199,12 +246,6 @@ public:
 	}
 
 #if 0
-	struct PatchRecord {
-		Point2 uv;
-		Point p;
-		int x, y;
-	};
-
 	void fillIntersectionRecord(const Ray &ray,
 			const void *tmp, Intersection &its) const {
 		PatchRecord &temp = *((PatchRecord *) tmp);
@@ -272,11 +313,13 @@ public:
 			m_levelCount = (int) std::max(log2i((uint32_t) m_dataSize.x-1), log2i((uint32_t) m_dataSize.y-1)) + 1;
 
 			m_levelSize = new Vector2i[m_levelCount];
+			m_numChildren = new Vector2i[m_levelCount];
 			m_blockSizeF = new Vector2[m_levelCount];
 			m_minmax = new Interval*[m_levelCount];
 
 			m_levelSize[0]  = Vector2i(m_dataSize.x - 1, m_dataSize.y - 1);
 			m_blockSizeF[0] = Vector2(1, 1);
+			m_surfaceArea = 0;
 			size = (size_t) m_levelSize[0].x * (size_t) m_levelSize[0].y * sizeof(Interval);
 			m_minmax[0] = (Interval *) allocAligned(size);
 			storageSize += size;
@@ -292,6 +335,10 @@ public:
 					Float vmin = std::min(std::min(v00, v01), std::min(v10, v11));
 					Float vmax = std::max(std::max(v00, v01), std::max(v10, v11));
 					*bounds++ = Interval(vmin, vmax);
+
+					/* Estimate the total surface area (this is approximate) */
+					Float diff0 = v01-v10, diff1 = v00-v11;
+					m_surfaceArea += std::sqrt(1.0f + .5f * (diff0*diff0 + diff1*diff1));
 				}
 			}
 
@@ -303,6 +350,9 @@ public:
 				/* Calculate size of this layer */
 				cur.x = prev.x > 1 ? (prev.x / 2) : 1;
 				cur.y = prev.y > 1 ? (prev.y / 2) : 1;
+
+				m_numChildren[level].x = prev.x > 1 ? 2 : 1;
+				m_numChildren[level].y = prev.y > 1 ? 2 : 1;
 				m_blockSizeF[level] = Vector2(
 					m_levelSize[0].x / cur.x,
 					m_levelSize[0].y / cur.y
@@ -331,6 +381,7 @@ public:
 					}
 				}
 			}
+
 			Log(EInfo, "Done (took %i ms, uses %s of memory)", timer->getMilliseconds(),
 					memString(storageSize).c_str());
 
@@ -359,10 +410,12 @@ private:
 	/* Height field data */
 	Float *m_data;
 	Vector2i m_dataSize;
+	Float m_surfaceArea;
 
 	/* Min-max quadtree data */
 	int m_levelCount;
 	Vector2i *m_levelSize;
+	Vector2i *m_numChildren;
 	Vector2 *m_blockSizeF;
 	Interval **m_minmax;
 };

From 0ccb7211df365bf09924302eced6a0d761fdefb9 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Tue, 10 Sep 2013 18:43:10 +0200
Subject: [PATCH 15/43] integer version of 'signum'

---
 include/mitsuba/core/util.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/mitsuba/core/util.h b/include/mitsuba/core/util.h
index 4a71618e..147eb51e 100644
--- a/include/mitsuba/core/util.h
+++ b/include/mitsuba/core/util.h
@@ -291,7 +291,18 @@ inline Float signum(Float value) {
 		return -1;
 	else if (value > 0)
 		return 1;
-	else return 0;
+	else
+		return 0;
+}
+
+/// Compute the signum (a.k.a. "sign") function, and return an integer value
+inline int signumToInt(Float value) {
+	if (value < 0)
+		return -1;
+	else if (value > 0)
+		return 1;
+	else
+		return 0;
 }
 
 /// Integer floor function

From db35564b01518dc896744aef0be66d17f89732fa Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Tue, 10 Sep 2013 18:43:44 +0200
Subject: [PATCH 16/43] extended TAABB::rayIntersect function

---
 include/mitsuba/core/aabb.h | 70 +++++++++++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 2 deletions(-)

diff --git a/include/mitsuba/core/aabb.h b/include/mitsuba/core/aabb.h
index 04fd075b..29b6c925 100644
--- a/include/mitsuba/core/aabb.h
+++ b/include/mitsuba/core/aabb.h
@@ -292,6 +292,11 @@ template <typename T> struct TAABB {
 	/** \brief Calculate the near and far ray-AABB intersection
 	 * points (if they exist).
 	 *
+	 * The parameters \c nearT and \c farT are used to return the
+	 * ray distances to the intersections (including negative distances).
+	 * Any previously contained value is overwritten, even if there was
+	 * no intersection.
+	 *
 	 * \remark In the Python bindings, this function returns the
 	 * \c nearT and \c farT values as a tuple (or \c None, when no
 	 * intersection was found)
@@ -302,11 +307,10 @@ template <typename T> struct TAABB {
 
 		/* For each pair of AABB planes */
 		for (int i=0; i<PointType::dim; i++) {
-			const Float direction = ray.d[i];
 			const Float origin = ray.o[i];
 			const Float minVal = min[i], maxVal = max[i];
 
-			if (direction == 0) {
+			if (ray.d[i] == 0) {
 				/* The ray is parallel to the planes */
 				if (origin < minVal || origin > maxVal)
 					return false;
@@ -329,6 +333,68 @@ template <typename T> struct TAABB {
 		return true;
 	}
 
+	/** \brief Calculate the overlap between an axis-aligned bounding box
+	 * and a ray segment
+	 *
+	 * This function is an extended version of the simpler \ref rayIntersect command
+	 * provided above. The first change is that input values passed via
+	 * the \c nearT and \c farT parameters are considered to specify a query interval.
+	 *
+	 * This interval is intersected against the bounding box, returning the remaining
+	 * interval using the \c nearT and \c farT parameters. Furthermore, the
+	 * interval endpoints are also returned as 3D positions via the \c near and
+	 * \c far parameters. Special care is taken to reduce round-off errors.
+	 *
+	 * \remark Not currently exposed via the Python bindings
+	 */
+	FINLINE bool rayIntersect(const Ray &ray, Float &nearT, Float &farT, Point &near, Point &far) const {
+		int nearAxis = -1, farAxis = -1;
+
+		/* For each pair of AABB planes */
+		for (int i=0; i<PointType::dim; i++) {
+			const Float origin = ray.o[i];
+			const Float minVal = min[i], maxVal = max[i];
+
+			if (ray.d[i] == 0) {
+				/* The ray is parallel to the planes */
+				if (origin < minVal || origin > maxVal)
+					return false;
+			} else {
+				/* Calculate intersection distances */
+				Float t1 = (minVal - origin) * ray.dRcp[i];
+				Float t2 = (maxVal - origin) * ray.dRcp[i];
+
+				bool flip = t1 > t2;
+				if (flip)
+					std::swap(t1, t2);
+
+				if (t1 > nearT) {
+					nearT = t1;
+					nearAxis = flip ? (i + PointType::dim) : i;
+				}
+
+				if (t2 < farT) {
+					farT = t2;
+					farAxis = flip ? i : (i + PointType::dim);
+				}
+			}
+		}
+
+		if (!(nearT <= farT))
+			return false;
+
+		near = ray(nearT); far = ray(farT);
+
+		/* Avoid roundoff errors on the component where the intersection took place */
+		if (nearAxis >= 0)
+			near[nearAxis % PointType::dim] = ((Float *) this)[nearAxis];
+
+		if (farAxis >= 0)
+			far[farAxis % PointType::dim]   = ((Float *) this)[farAxis];
+
+		return true;
+	}
+
 	/// Serialize this bounding box to a binary data stream
 	inline void serialize(Stream *stream) const {
 		min.serialize(stream);

From a1f61825ad49c6185b0b67f5c0fe66db0a3aa219 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Tue, 10 Sep 2013 18:44:25 +0200
Subject: [PATCH 17/43] initial working height field implementation

---
 src/librender/scene.cpp    |   2 +-
 src/shapes/heightfield.cpp | 176 +++++++++++++++++--------------------
 2 files changed, 80 insertions(+), 98 deletions(-)

diff --git a/src/librender/scene.cpp b/src/librender/scene.cpp
index ac42ce4f..2f7f0e39 100644
--- a/src/librender/scene.cpp
+++ b/src/librender/scene.cpp
@@ -333,7 +333,7 @@ void Scene::initialize() {
 		}
 		if (primitiveCount != effPrimitiveCount) {
 			Log(EDebug, "Scene contains " SIZE_T_FMT " primitives. Due to "
-				"instancing, the effective number of primitives is "
+				"instancing or other kinds of procedural geometry, the effective number of primitives is "
 				SIZE_T_FMT ".", primitiveCount, effPrimitiveCount);
 		}
 
diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
index de691b24..c3630566 100644
--- a/src/shapes/heightfield.cpp
+++ b/src/shapes/heightfield.cpp
@@ -23,20 +23,28 @@
 #include <mitsuba/core/bitmap.h>
 #include <mitsuba/core/timer.h>
 
-#define MTS_QTREE_MAXDEPTH 24 /* Up to 16M x 16M */
+#define MTS_QTREE_MAXDEPTH 50
 
 MTS_NAMESPACE_BEGIN
 
 namespace {
 	/// Find the smallest t >= 0 such that a*t + b is a multiple of c
 	inline Float nextMultiple(Float a, Float b, Float c) {
-		if (a == 0)
-			return std::numeric_limits<Float>::infinity();
-		else if (a > 0)
-			return (std::ceil(b/c)*c - b) / a;
-		else
-			return (std::floor(b/c)*c - b) / a;
+		Float tmp     = b/c,
+			  rounded = (a > 0 ? std::ceil(tmp) : std::floor(tmp)) * c,
+			  diff    = rounded - b;
+
+		if (diff == 0)
+			diff = signum(a) * c;
+
+		return diff / a;
 	}
+
+	/// Temporary storage for patch-ray intersections
+	struct PatchIntersectionRecord {
+		Point p;   //< Intersection in local coordinates
+		int x, y;
+	};
 };
 
 class Heightfield : public Shape {
@@ -62,7 +70,7 @@ public:
 			delete[] m_minmax;
 			delete[] m_levelSize;
 			delete[] m_numChildren;
-			delete[] m_blockSizeF;
+			delete[] m_blockSize;
 		}
     }
 
@@ -89,22 +97,18 @@ public:
 	size_t getEffectivePrimitiveCount() const {
 		return (size_t) m_levelSize[0].x * (size_t) m_levelSize[0].y;
 	}
+
 	struct StackEntry {
 		int level;
 		int x, y;
 
 		inline std::string toString() const {
 			std::ostringstream oss;
-			oss << "StackEntry[level=" << level << ", x=" << x << ", y=" << y << "]" << endl;
+			oss << "StackEntry[level=" << level << ", x=" << x << ", y=" << y << "]";
 			return oss.str();
 		}
 	};
 
-	struct PatchRecord {
-		Point2 uv;
-		Point p;
-		int x, y;
-	};
 
 	bool rayIntersect(const Ray &_ray, Float mint, Float maxt, Float &t, void *tmp) const {
 		StackEntry stack[MTS_QTREE_MAXDEPTH];
@@ -113,28 +117,19 @@ public:
 		Ray ray;
 		m_objectToWorld.inverse()(_ray, ray);
 
-		cout << "Tracing ray " << ray.toString() << maxt << endl;
-
-		/* Rescale the ray so that it has unit length in 2D */
-//		Float length2D = hypot2(ray.d.x * (maxt-mint), ray.d.y * (maxt-mint));
-//		ray.d /= length2D; ray.dRcp *= length2D;
-
 		/* Ray length to cross a single cell along the X or Y axis */
-		Float tDeltaXSingle  = std::abs(ray.dRcp.x),
-		      tDeltaYSingle  = std::abs(ray.dRcp.y);
+		Float tDeltaXSingle = std::abs(ray.dRcp.x),
+		      tDeltaYSingle = std::abs(ray.dRcp.y);
 
 		/* Cell coordinate increments for steps along the ray */
-		int iDeltaX = (int) signum(ray.d.x),
-			iDeltaY = (int) signum(ray.d.y);
+		int iDeltaX = signumToInt(ray.d.x),
+			iDeltaY = signumToInt(ray.d.y);
 
 		if (iDeltaX == 0 && iDeltaY == 0) {
-			/* TODO: special case for perpendicular rays. Also, provide int version of signum */
+			/* TODO: special case for perpendicular rays */
 			return false;
 		}
 
-		cout << "tDeltaSingle=" << tDeltaXSingle << ", " << tDeltaYSingle << endl;
-		cout << "iDeltaX=" << iDeltaX << ", " << iDeltaY << endl;
-
 		int stackIdx = 0;
 		stack[stackIdx].level = m_levelCount-1;
 		stack[stackIdx].x = 0;
@@ -143,69 +138,53 @@ public:
 		while (stackIdx >= 0) {
 			StackEntry entry         = stack[stackIdx--];
 			const Interval &interval = m_minmax[entry.level][entry.x + entry.y * m_levelSize[entry.level].x];
-			const Vector2 &blockSize = m_blockSizeF[entry.level];
-
-			cout << endl << "Processing stack entry: " << entry.toString() << endl;
+			const Vector2 &blockSize = m_blockSize[entry.level];
+			Ray localRay(Point(ray.o.x - entry.x*blockSize.x, ray.o.y - entry.y*blockSize.y, ray.o.z), ray.d, 0);
 
 			/* Intersect against the current min-max quadtree node */
 			AABB aabb(
-				Point3(entry.x       * blockSize.x, entry.y       * blockSize.y, interval.min),
-				Point3((entry.x + 1) * blockSize.x, (entry.y + 1) * blockSize.y, interval.max)
+				Point3(0, 0, interval.min),
+				Point3(blockSize.x, blockSize.y, interval.max)
 			);
 
-			Float nearT, farT;
-			bool match = aabb.rayIntersect(ray, nearT, farT);
-			if (!match) {
-				cout << "Miss (1)!" << endl;
-				continue;
-			}
+			Float nearT = mint, farT = maxt;
+			Point enterPt, exitPt;
 
-			nearT = std::max(nearT, mint); farT = std::min(farT, maxt);
-			if (nearT > farT) {
-				cout << "Miss (2), nearT=" << nearT << ", farT=" << farT << "!" << endl;
+			if (!aabb.rayIntersect(localRay, nearT, farT, enterPt, exitPt))
 				continue;
-			}
 
-			Point2 enter(ray.o.x + ray.d.x * nearT - aabb.min.x,
-				         ray.o.y + ray.d.y * nearT - aabb.min.y);
 			Float tMax = farT - nearT;
 
-			cout << "enter=" << enter.toString() << endl;
-
 			if (entry.level > 0) {
 				/* Inner node -- push child nodes in 2D DDA order */
 				const Vector2i &numChildren = m_numChildren[entry.level];
-				const Vector2 &subBlockSize = m_blockSizeF[--entry.level];
+				const Vector2 &subBlockSize = m_blockSize[--entry.level];
 				entry.x *= numChildren.x; entry.y *= numChildren.y;
 
-				int x = (enter.x >= subBlockSize.x) ? numChildren.x-1 : 0; /// precompute, then use ceil?
-				int y = (enter.y >= subBlockSize.y) ? numChildren.y-1 : 0;
+				int x = (exitPt.x >= subBlockSize.x) ? numChildren.x-1 : 0;
+				int y = (exitPt.y >= subBlockSize.y) ? numChildren.y-1 : 0;
 
 				Float tDeltaX = tDeltaXSingle * subBlockSize.x,
 				      tDeltaY = tDeltaYSingle * subBlockSize.y,
-					  tNextX  = nextMultiple(ray.d.x, p.x, subBlockSize.x),
-					  tNextY  = nextMultiple(ray.d.y, p.y, subBlockSize.y),
+					  tNextX  = nextMultiple(-ray.d.x, exitPt.x, subBlockSize.x),
+					  tNextY  = nextMultiple(-ray.d.y, exitPt.y, subBlockSize.y),
 					  t       = 0;
 
-				cout << "x,y=" << x << "," << y << endl;
-				cout << "tDelta=" << tDeltaX << ", " << tDeltaY << endl;
-				cout << "tNext=" << tNextX << ", " << tNextY << endl;
-
 				while ((uint32_t) x < (uint32_t) numChildren.x &&
-				       (uint32_t) y < (uint32_t) numChildren.y && t < tMax) {
+				       (uint32_t) y < (uint32_t) numChildren.y && t <= tMax) {
+					SAssert(stackIdx+1 < MTS_QTREE_MAXDEPTH);
 					stack[++stackIdx].level = entry.level;
 					stack[stackIdx].x = entry.x + x;
 					stack[stackIdx].y = entry.y + y;
-					cout << "Adding node" << stack[stackIdx].toString() << endl;
 
 					if (tNextX < tNextY) {
 						t = tNextX;
 						tNextX += tDeltaX;
-						x += iDeltaX;
+						x -= iDeltaX;
 					} else {
 						t = tNextY;
 						tNextY += tDeltaY;
-						y += iDeltaY;
+						y -= iDeltaY;
 					}
 				}
 			} else {
@@ -216,27 +195,32 @@ public:
 					f11 = m_data[(entry.y + 1) * m_dataSize.x + entry.x + 1];
 
 				Float A = ray.d.x * ray.d.y * (f00 - f01 - f10 + f11);
-				Float B = ray.d.y * (f01 - f00 + enter.x * (f00 - f01 - f10 + f11))
-				        + ray.d.x * (f10 - f00 + enter.y * (f00 - f01 - f10 + f11))
+				Float B = ray.d.y * (f01 - f00 + enterPt.x * (f00 - f01 - f10 + f11))
+				        + ray.d.x * (f10 - f00 + enterPt.y * (f00 - f01 - f10 + f11))
 						- ray.d.z;
-				Float C = (enter.x - 1) * (enter.y - 1) * f00
-				        + enter.y * f01 + enter.x * (f10 - enter.y * (f01 + f10 - f11))
-				        - ray.o.y - ray.d.y * nearT;
-
-				cout << "Leaf node. Quadratic polynomial: " << A << ", " << B << ", " << C << endl;
+				Float C = (enterPt.x - 1) * (enterPt.y - 1) * f00
+				        + enterPt.y * f01 + enterPt.x * (f10 - enterPt.y * (f01 + f10 - f11))
+				        - enterPt.z;
 
 				Float t0, t1;
-				if (!solveQuadratic(A, B, C, t0, t1)) {
-					cout << "No solution!" << endl;
+				if (!solveQuadratic(A, B, C, t0, t1))
 					continue;
-				}
-				cout << "Solved for t0=" << t0 << ", t1=" << t1 << "." << endl;
-				if (t0 < 0 || t1 > tMax) {
-					cout << "Solution is out of bounds!" << endl;
+
+				if (t0 >= -Epsilon && t0 <= tMax + Epsilon)
+					t = t0;
+				else if (t1 >= -Epsilon && t1 <= tMax + Epsilon)
+					t = t1;
+				else
 					continue;
+
+				if (tmp) {
+					PatchIntersectionRecord &temp = *((PatchIntersectionRecord *) tmp);
+					Point pLocal = enterPt + ray.d * t;
+					temp.x = entry.x;
+					temp.y = entry.y;
+					temp.p = pLocal;
+					t += nearT;
 				}
-				t = (t0 >= 0) ? t0 : t1;
-				cout << "Decided on t=" << t << endl;
 
 				return true;
 			}
@@ -245,26 +229,25 @@ public:
 		return false;
 	}
 
-#if 0
 	void fillIntersectionRecord(const Ray &ray,
 			const void *tmp, Intersection &its) const {
-		PatchRecord &temp = *((PatchRecord *) tmp);
+		PatchIntersectionRecord &temp = *((PatchIntersectionRecord *) tmp);
 
-		int x = temp.x, y = temp.y, width = m_levelSize[0].x;
+		int x = temp.x, y = temp.y, width = m_dataSize.x;
 		Float
-			f00 = m_data[x   + y     * width],
-			f01 = m_data[x   + (y+1) * width],
-			f10 = m_data[x+1 + y     * width],
-			f11 = m_data[x+1 + (y+1) * width];
+			f00 = m_data[y     * width + x],
+			f01 = m_data[(y+1) * width + x],
+			f10 = m_data[y     * width + x + 1],
+			f11 = m_data[(y+1) * width + x + 1];
 
-		its.uv = Point2((x+temp.uv.x) * m_invSize.x, (y+temp.uv.y) * m_invSize.y);
-		its.p  = temp.p;
+		its.p  = Point(temp.p.x + temp.x, temp.p.y + temp.y, temp.p.z);
 
-		its.dpdu = Vector(1, 0, (1.0f - its.uv.y) * (f10 - f00) + its.uv.y * (f11 - f01));
-		its.dpdv = Vector(0, 1, (1.0f - its.uv.x) * (f01 - f00) + its.uv.x * (f11 - f10));
-		its.geoFrame.n = cross(its.dpdu, its.dpdv);
+		its.uv = Point2(its.p.x / m_levelSize[0].x, its.p.y / m_levelSize[0].y);
+		its.dpdu = Vector(1, 0, (1.0f - temp.p.y) * (f10 - f00) + temp.p.y * (f11 - f01)) * m_levelSize[0].x;
+		its.dpdv = Vector(0, 1, (1.0f - temp.p.x) * (f01 - f00) + temp.p.x * (f11 - f10)) * m_levelSize[0].y;
 		its.geoFrame.s = normalize(its.dpdu);
-		its.geoFrame.t = normalize(its.dpdv);
+		its.geoFrame.t = normalize(its.dpdv - dot(its.dpdv, its.geoFrame.s) * its.geoFrame.s);
+		its.geoFrame.n = cross(its.geoFrame.s, its.geoFrame.t);
 
 		its.shFrame = its.geoFrame;
 		its.shape = this;
@@ -274,7 +257,6 @@ public:
 		its.instance = NULL;
 		its.time = ray.time;
 	}
-#endif
 
 	bool rayIntersect(const Ray &ray, Float mint, Float maxt) const {
 		Float t;
@@ -314,11 +296,11 @@ public:
 
 			m_levelSize = new Vector2i[m_levelCount];
 			m_numChildren = new Vector2i[m_levelCount];
-			m_blockSizeF = new Vector2[m_levelCount];
+			m_blockSize = new Vector2[m_levelCount];
 			m_minmax = new Interval*[m_levelCount];
 
 			m_levelSize[0]  = Vector2i(m_dataSize.x - 1, m_dataSize.y - 1);
-			m_blockSizeF[0] = Vector2(1, 1);
+			m_blockSize[0] = Vector2(1, 1);
 			m_surfaceArea = 0;
 			size = (size_t) m_levelSize[0].x * (size_t) m_levelSize[0].y * sizeof(Interval);
 			m_minmax[0] = (Interval *) allocAligned(size);
@@ -328,10 +310,10 @@ public:
 			Interval *bounds = m_minmax[0];
 			for (int y=0; y<m_levelSize[0].y; ++y) {
 				for (int x=0; x<m_levelSize[0].x; ++x) {
-					Float v00 = m_data[y * m_levelSize[0].x + x];
-					Float v01 = m_data[y * m_levelSize[0].x + x + 1];
-					Float v10 = m_data[(y + 1) * m_levelSize[0].x + x];
-					Float v11 = m_data[(y + 1) * m_levelSize[0].x + x + 1];
+					Float v00 = m_data[y * m_dataSize.x + x];
+					Float v01 = m_data[y * m_dataSize.x + x + 1];
+					Float v10 = m_data[(y + 1) * m_dataSize.x + x];
+					Float v11 = m_data[(y + 1) * m_dataSize.x + x + 1];
 					Float vmin = std::min(std::min(v00, v01), std::min(v10, v11));
 					Float vmax = std::max(std::max(v00, v01), std::max(v10, v11));
 					*bounds++ = Interval(vmin, vmax);
@@ -353,7 +335,7 @@ public:
 
 				m_numChildren[level].x = prev.x > 1 ? 2 : 1;
 				m_numChildren[level].y = prev.y > 1 ? 2 : 1;
-				m_blockSizeF[level] = Vector2(
+				m_blockSize[level] = Vector2(
 					m_levelSize[0].x / cur.x,
 					m_levelSize[0].y / cur.y
 				);
@@ -416,7 +398,7 @@ private:
 	int m_levelCount;
 	Vector2i *m_levelSize;
 	Vector2i *m_numChildren;
-	Vector2 *m_blockSizeF;
+	Vector2 *m_blockSize;
 	Interval **m_minmax;
 };
 

From 6d382475514df6969bb5f6c564734fc1e4f87b1f Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Tue, 10 Sep 2013 20:33:24 +0200
Subject: [PATCH 18/43] shading normal support

---
 src/shapes/heightfield.cpp | 111 ++++++++++++++++++++++++++-----------
 1 file changed, 80 insertions(+), 31 deletions(-)

diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
index c3630566..d159f5be 100644
--- a/src/shapes/heightfield.cpp
+++ b/src/shapes/heightfield.cpp
@@ -42,24 +42,30 @@ namespace {
 
 	/// Temporary storage for patch-ray intersections
 	struct PatchIntersectionRecord {
-		Point p;   //< Intersection in local coordinates
+		Point p;
 		int x, y;
 	};
+
+	/// Stack entry for recursive quadtree traversal
+	struct StackEntry {
+		int level, x, y;
+	};
 };
 
 class Heightfield : public Shape {
 public:
-	Heightfield(const Properties &props) : Shape(props), m_data(NULL) {
+	Heightfield(const Properties &props) : Shape(props), m_data(NULL), m_normals(NULL) {
 		m_sizeHint = Vector2i(
 			props.getInteger("width", -1),
 			props.getInteger("height", -1)
 		);
 
 		m_objectToWorld = props.getTransform("toWorld", Transform());
+		m_shadingNormals = props.getBoolean("shadingNormals", true);
 	}
 
 	Heightfield(Stream *stream, InstanceManager *manager)
-		: Shape(stream, manager), m_data(NULL) {
+		: Shape(stream, manager), m_data(NULL), m_normals(NULL) {
 	}
 
 	~Heightfield() {
@@ -72,6 +78,8 @@ public:
 			delete[] m_numChildren;
 			delete[] m_blockSize;
 		}
+		if (m_normals)
+			freeAligned(m_normals);
     }
 
 	void serialize(Stream *stream, InstanceManager *manager) const {
@@ -98,18 +106,6 @@ public:
 		return (size_t) m_levelSize[0].x * (size_t) m_levelSize[0].y;
 	}
 
-	struct StackEntry {
-		int level;
-		int x, y;
-
-		inline std::string toString() const {
-			std::ostringstream oss;
-			oss << "StackEntry[level=" << level << ", x=" << x << ", y=" << y << "]";
-			return oss.str();
-		}
-	};
-
-
 	bool rayIntersect(const Ray &_ray, Float mint, Float maxt, Float &t, void *tmp) const {
 		StackEntry stack[MTS_QTREE_MAXDEPTH];
 
@@ -172,7 +168,6 @@ public:
 
 				while ((uint32_t) x < (uint32_t) numChildren.x &&
 				       (uint32_t) y < (uint32_t) numChildren.y && t <= tMax) {
-					SAssert(stackIdx+1 < MTS_QTREE_MAXDEPTH);
 					stack[++stackIdx].level = entry.level;
 					stack[stackIdx].x = entry.x + x;
 					stack[stackIdx].y = entry.y + y;
@@ -245,11 +240,27 @@ public:
 		its.uv = Point2(its.p.x / m_levelSize[0].x, its.p.y / m_levelSize[0].y);
 		its.dpdu = Vector(1, 0, (1.0f - temp.p.y) * (f10 - f00) + temp.p.y * (f11 - f01)) * m_levelSize[0].x;
 		its.dpdv = Vector(0, 1, (1.0f - temp.p.x) * (f01 - f00) + temp.p.x * (f11 - f10)) * m_levelSize[0].y;
+
 		its.geoFrame.s = normalize(its.dpdu);
 		its.geoFrame.t = normalize(its.dpdv - dot(its.dpdv, its.geoFrame.s) * its.geoFrame.s);
 		its.geoFrame.n = cross(its.geoFrame.s, its.geoFrame.t);
 
-		its.shFrame = its.geoFrame;
+		if (m_shadingNormals) {
+			Normal
+				n00 = m_normals[y     * width + x],
+				n01 = m_normals[(y+1) * width + x],
+				n10 = m_normals[y     * width + x + 1],
+				n11 = m_normals[(y+1) * width + x + 1];
+
+			its.shFrame.n = normalize(
+				(1 - temp.p.x) * ((1-temp.p.y) * n00 + temp.p.y * n01)
+				   + temp.p.x  * ((1-temp.p.y) * n10 + temp.p.y * n11));
+
+			its.shFrame.s = normalize(its.geoFrame.s - dot(its.geoFrame.s, its.shFrame.n) * its.shFrame.n);
+			its.shFrame.t = cross(its.shFrame.n, its.shFrame.s);
+		} else {
+			its.shFrame = its.geoFrame;
+		}
 		its.shape = this;
 
  		its.wi = its.toLocal(-ray.d);
@@ -266,6 +277,9 @@ public:
 	void addChild(const std::string &name, ConfigurableObject *child) {
 		const Class *cClass = child->getClass();
 		if (cClass->derivesFrom(Texture::m_theClass)) {
+			if (m_data != NULL)
+				Log(EError, "Attempted to attach multiple textures to a height field shape!");
+
 			ref<Bitmap> bitmap = static_cast<Texture *>(child)->getBitmap(m_sizeHint);
 
 			m_dataSize = bitmap->getSize();
@@ -287,6 +301,14 @@ public:
 			size_t size = (size_t) m_dataSize.x * (size_t) m_dataSize.y * sizeof(Float),
 			       storageSize = size;
 			m_data = (Float *) allocAligned(size);
+
+			if (m_shadingNormals) {
+				size *= 3;
+				m_normals = (Normal *) allocAligned(size);
+				memset(m_normals, 0, size);
+				storageSize += size;
+			}
+
 			bitmap->convert(m_data, Bitmap::ELuminance, Bitmap::EFloat);
 
 			Log(EInfo, "Building acceleration data structure for %ix%i height field ..", m_dataSize.x, m_dataSize.y);
@@ -310,16 +332,16 @@ public:
 			Interval *bounds = m_minmax[0];
 			for (int y=0; y<m_levelSize[0].y; ++y) {
 				for (int x=0; x<m_levelSize[0].x; ++x) {
-					Float v00 = m_data[y * m_dataSize.x + x];
-					Float v01 = m_data[y * m_dataSize.x + x + 1];
-					Float v10 = m_data[(y + 1) * m_dataSize.x + x];
-					Float v11 = m_data[(y + 1) * m_dataSize.x + x + 1];
-					Float vmin = std::min(std::min(v00, v01), std::min(v10, v11));
-					Float vmax = std::max(std::max(v00, v01), std::max(v10, v11));
-					*bounds++ = Interval(vmin, vmax);
+					Float f00 = m_data[y * m_dataSize.x + x];
+					Float f10 = m_data[y * m_dataSize.x + x + 1];
+					Float f01 = m_data[(y + 1) * m_dataSize.x + x];
+					Float f11 = m_data[(y + 1) * m_dataSize.x + x + 1];
+					Float fmin = std::min(std::min(f00, f01), std::min(f10, f11));
+					Float fmax = std::max(std::max(f00, f01), std::max(f10, f11));
+					*bounds++ = Interval(fmin, fmax);
 
 					/* Estimate the total surface area (this is approximate) */
-					Float diff0 = v01-v10, diff1 = v00-v11;
+					Float diff0 = f01-f10, diff1 = f00-f11;
 					m_surfaceArea += std::sqrt(1.0f + .5f * (diff0*diff0 + diff1*diff1));
 				}
 			}
@@ -353,17 +375,42 @@ public:
 					for (int x=0; x<cur.x; ++x) {
 						int x0 = std::min(2*x,   prev.x-1),
 							x1 = std::min(2*x+1, prev.x-1);
-						const Interval &v00 = prevBounds[y0 * prev.x + x0], &v01 = prevBounds[y0 * prev.x + x1];
-						const Interval &v10 = prevBounds[y1 * prev.x + x0], &v11 = prevBounds[y1 * prev.x + x1];
-						Interval combined(v00);
-						combined.expandBy(v01);
-						combined.expandBy(v10);
-						combined.expandBy(v11);
+						const Interval &f00 = prevBounds[y0 * prev.x + x0], &f01 = prevBounds[y0 * prev.x + x1];
+						const Interval &f10 = prevBounds[y1 * prev.x + x0], &f11 = prevBounds[y1 * prev.x + x1];
+						Interval combined(f00);
+						combined.expandBy(f01);
+						combined.expandBy(f10);
+						combined.expandBy(f11);
 						*curBounds++ = combined;
 					}
 				}
 			}
 
+			if (m_shadingNormals) {
+				Log(EInfo, "Precomputing shading normals ..");
+
+				for (int y=0; y<m_levelSize[0].y; ++y) {
+					for (int x=0; x<m_levelSize[0].x; ++x) {
+						Float f00 = m_data[y * m_dataSize.x + x];
+						Float f10 = m_data[y * m_dataSize.x + x + 1];
+						Float f01 = m_data[(y + 1) * m_dataSize.x + x];
+						Float f11 = m_data[(y + 1) * m_dataSize.x + x + 1];
+
+						m_normals[y       * m_dataSize.x + x]     += normalize(Normal(f00 - f10, f00 - f01, 1));
+						m_normals[y       * m_dataSize.x + x + 1] += normalize(Normal(f00 - f10, f10 - f11, 1));
+						m_normals[(y + 1) * m_dataSize.x + x]     += normalize(Normal(f01 - f11, f00 - f01, 1));
+						m_normals[(y + 1) * m_dataSize.x + x + 1] += normalize(Normal(f01 - f11, f10 - f11, 1));
+					}
+				}
+
+				for (int y=0; y<m_dataSize.y; ++y) {
+					for (int x=0; x<m_dataSize.x; ++x) {
+						Normal &normal = m_normals[x + y * m_dataSize.x];
+						normal /= normal.length();
+					}
+				}
+			}
+
 			Log(EInfo, "Done (took %i ms, uses %s of memory)", timer->getMilliseconds(),
 					memString(storageSize).c_str());
 
@@ -388,9 +435,11 @@ private:
 	Transform m_objectToWorld;
 	Vector2i m_sizeHint;
 	AABB m_dataAABB;
+	bool m_shadingNormals;
 
 	/* Height field data */
 	Float *m_data;
+	Normal *m_normals;
 	Vector2i m_dataSize;
 	Float m_surfaceArea;
 

From d9fdeee16ba3729b47ce3572e976ecbe9847da7d Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Wed, 11 Sep 2013 11:56:02 +0200
Subject: [PATCH 19/43] heightfield: serialization support, toString() method

---
 src/shapes/heightfield.cpp | 252 +++++++++++++++++++++----------------
 1 file changed, 146 insertions(+), 106 deletions(-)

diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
index d159f5be..17814d8a 100644
--- a/src/shapes/heightfield.cpp
+++ b/src/shapes/heightfield.cpp
@@ -18,15 +18,22 @@
 
 #include <mitsuba/render/shape.h>
 #include <mitsuba/render/bsdf.h>
+#include <mitsuba/render/emitter.h>
+#include <mitsuba/render/medium.h>
+#include <mitsuba/render/sensor.h>
+#include <mitsuba/render/subsurface.h>
 #include <mitsuba/render/trimesh.h>
 #include <mitsuba/render/texture.h>
 #include <mitsuba/core/bitmap.h>
+#include <mitsuba/core/statistics.h>
 #include <mitsuba/core/timer.h>
 
 #define MTS_QTREE_MAXDEPTH 50
 
 MTS_NAMESPACE_BEGIN
 
+static StatsCounter numTraversals("Height field", "Traversal operations per query", EAverage);
+
 namespace {
 	/// Find the smallest t >= 0 such that a*t + b is a multiple of c
 	inline Float nextMultiple(Float a, Float b, Float c) {
@@ -54,7 +61,7 @@ namespace {
 
 class Heightfield : public Shape {
 public:
-	Heightfield(const Properties &props) : Shape(props), m_data(NULL), m_normals(NULL) {
+	Heightfield(const Properties &props) : Shape(props), m_data(NULL), m_normals(NULL), m_minmax(NULL) {
 		m_sizeHint = Vector2i(
 			props.getInteger("width", -1),
 			props.getInteger("height", -1)
@@ -62,15 +69,26 @@ public:
 
 		m_objectToWorld = props.getTransform("toWorld", Transform());
 		m_shadingNormals = props.getBoolean("shadingNormals", true);
+		m_flipNormals = props.getBoolean("flipNormals", false);
 	}
 
 	Heightfield(Stream *stream, InstanceManager *manager)
-		: Shape(stream, manager), m_data(NULL), m_normals(NULL) {
+		: Shape(stream, manager), m_data(NULL), m_normals(NULL), m_minmax(NULL) {
+
+		m_objectToWorld = Transform(stream);
+		m_shadingNormals = stream->readBool();
+		m_flipNormals = stream->readBool();
+		m_dataSize = Vector2i(stream);
+		size_t size = (size_t) m_dataSize.x * (size_t) m_dataSize.y;
+		m_data = (Float *) allocAligned(size * sizeof(Float));
+		stream->readFloatArray(m_data, size);
+		buildInternal();
 	}
 
 	~Heightfield() {
-		if (m_data) {
+		if (m_data)
 			freeAligned(m_data);
+		if (m_minmax) {
 			for (int i=0; i<m_levelCount; ++i)
 				freeAligned(m_minmax[i]);
 			delete[] m_minmax;
@@ -84,6 +102,11 @@ public:
 
 	void serialize(Stream *stream, InstanceManager *manager) const {
 		Shape::serialize(stream, manager);
+		m_objectToWorld.serialize(stream);
+		stream->writeBool(m_shadingNormals);
+		stream->writeBool(m_flipNormals);
+		m_dataSize.serialize(stream);
+		stream->writeFloatArray(m_data, (size_t) m_dataSize.x * (size_t) m_dataSize.y);
 	}
 
 	AABB getAABB() const {
@@ -131,6 +154,9 @@ public:
 		stack[stackIdx].x = 0;
 		stack[stackIdx].y = 0;
 
+		numTraversals.incrementBase();
+
+		size_t nTraversals = 0;
 		while (stackIdx >= 0) {
 			StackEntry entry         = stack[stackIdx--];
 			const Interval &interval = m_minmax[entry.level][entry.x + entry.y * m_levelSize[entry.level].x];
@@ -146,6 +172,7 @@ public:
 			Float nearT = mint, farT = maxt;
 			Point enterPt, exitPt;
 
+			++nTraversals;
 			if (!aabb.rayIntersect(localRay, nearT, farT, enterPt, exitPt))
 				continue;
 
@@ -216,11 +243,13 @@ public:
 					temp.p = pLocal;
 					t += nearT;
 				}
+				numTraversals += nTraversals;
 
 				return true;
 			}
 		}
 
+		numTraversals += nTraversals;
 		return false;
 	}
 
@@ -298,134 +327,144 @@ public:
 					std::numeric_limits<Float>::infinity());
 			}
 
-			size_t size = (size_t) m_dataSize.x * (size_t) m_dataSize.y * sizeof(Float),
-			       storageSize = size;
+			size_t size = (size_t) m_dataSize.x * (size_t) m_dataSize.y * sizeof(Float);
 			m_data = (Float *) allocAligned(size);
-
-			if (m_shadingNormals) {
-				size *= 3;
-				m_normals = (Normal *) allocAligned(size);
-				memset(m_normals, 0, size);
-				storageSize += size;
-			}
-
 			bitmap->convert(m_data, Bitmap::ELuminance, Bitmap::EFloat);
 
-			Log(EInfo, "Building acceleration data structure for %ix%i height field ..", m_dataSize.x, m_dataSize.y);
+			buildInternal();
+		} else {
+			Shape::addChild(name, child);
+		}
+	}
 
-			ref<Timer> timer = new Timer();
-			m_levelCount = (int) std::max(log2i((uint32_t) m_dataSize.x-1), log2i((uint32_t) m_dataSize.y-1)) + 1;
+	void buildInternal() {
+		size_t storageSize = (size_t) m_dataSize.x * (size_t) m_dataSize.y * sizeof(Float);
+		Log(EInfo, "Building acceleration data structure for %ix%i height field ..", m_dataSize.x, m_dataSize.y);
 
-			m_levelSize = new Vector2i[m_levelCount];
-			m_numChildren = new Vector2i[m_levelCount];
-			m_blockSize = new Vector2[m_levelCount];
-			m_minmax = new Interval*[m_levelCount];
+		ref<Timer> timer = new Timer();
+		m_levelCount = (int) std::max(log2i((uint32_t) m_dataSize.x-1), log2i((uint32_t) m_dataSize.y-1)) + 1;
 
-			m_levelSize[0]  = Vector2i(m_dataSize.x - 1, m_dataSize.y - 1);
-			m_blockSize[0] = Vector2(1, 1);
-			m_surfaceArea = 0;
-			size = (size_t) m_levelSize[0].x * (size_t) m_levelSize[0].y * sizeof(Interval);
-			m_minmax[0] = (Interval *) allocAligned(size);
+		m_levelSize = new Vector2i[m_levelCount];
+		m_numChildren = new Vector2i[m_levelCount];
+		m_blockSize = new Vector2[m_levelCount];
+		m_minmax = new Interval*[m_levelCount];
+
+		m_levelSize[0]  = Vector2i(m_dataSize.x - 1, m_dataSize.y - 1);
+		m_blockSize[0] = Vector2(1, 1);
+		m_surfaceArea = 0;
+		size_t size = (size_t) m_levelSize[0].x * (size_t) m_levelSize[0].y * sizeof(Interval);
+		m_minmax[0] = (Interval *) allocAligned(size);
+		storageSize += size;
+
+		/* Build the lowest MIP layer directly from the heightfield data */
+		Interval *bounds = m_minmax[0];
+		for (int y=0; y<m_levelSize[0].y; ++y) {
+			for (int x=0; x<m_levelSize[0].x; ++x) {
+				Float f00 = m_data[y * m_dataSize.x + x];
+				Float f10 = m_data[y * m_dataSize.x + x + 1];
+				Float f01 = m_data[(y + 1) * m_dataSize.x + x];
+				Float f11 = m_data[(y + 1) * m_dataSize.x + x + 1];
+				Float fmin = std::min(std::min(f00, f01), std::min(f10, f11));
+				Float fmax = std::max(std::max(f00, f01), std::max(f10, f11));
+				*bounds++ = Interval(fmin, fmax);
+
+				/* Estimate the total surface area (this is approximate) */
+				Float diff0 = f01-f10, diff1 = f00-f11;
+				m_surfaceArea += std::sqrt(1.0f + .5f * (diff0*diff0 + diff1*diff1));
+			}
+		}
+
+		/* Propagate height bounds upwards to the other layers */
+		for (int level=1; level<m_levelCount; ++level) {
+			Vector2i &cur  = m_levelSize[level],
+			         &prev = m_levelSize[level-1];
+
+			/* Calculate size of this layer */
+			cur.x = prev.x > 1 ? (prev.x / 2) : 1;
+			cur.y = prev.y > 1 ? (prev.y / 2) : 1;
+
+			m_numChildren[level].x = prev.x > 1 ? 2 : 1;
+			m_numChildren[level].y = prev.y > 1 ? 2 : 1;
+			m_blockSize[level] = Vector2(
+				m_levelSize[0].x / cur.x,
+				m_levelSize[0].y / cur.y
+			);
+
+			/* Allocate memory for interval data */
+			Interval *prevBounds = m_minmax[level-1], *curBounds;
+			size_t size = (size_t) cur.x * (size_t) cur.y * sizeof(Interval);
+			m_minmax[level] = curBounds = (Interval *) allocAligned(size);
+			storageSize += size;
+
+			/* Build by querying the previous layer */
+			for (int y=0; y<cur.y; ++y) {
+				int y0 = std::min(2*y,   prev.y-1),
+					y1 = std::min(2*y+1, prev.y-1);
+				for (int x=0; x<cur.x; ++x) {
+					int x0 = std::min(2*x,   prev.x-1),
+						x1 = std::min(2*x+1, prev.x-1);
+					const Interval &f00 = prevBounds[y0 * prev.x + x0], &f01 = prevBounds[y0 * prev.x + x1];
+					const Interval &f10 = prevBounds[y1 * prev.x + x0], &f11 = prevBounds[y1 * prev.x + x1];
+					Interval combined(f00);
+					combined.expandBy(f01);
+					combined.expandBy(f10);
+					combined.expandBy(f11);
+					*curBounds++ = combined;
+				}
+			}
+		}
+
+		if (m_shadingNormals) {
+			Log(EInfo, "Precomputing shading normals ..");
+			size_t size = (size_t) m_dataSize.x * (size_t) m_dataSize.y * sizeof(Normal);
+			m_normals = (Normal *) allocAligned(size);
+			memset(m_normals, 0, size);
 			storageSize += size;
 
-			/* Build the lowest MIP layer directly from the heightfield data */
-			Interval *bounds = m_minmax[0];
 			for (int y=0; y<m_levelSize[0].y; ++y) {
 				for (int x=0; x<m_levelSize[0].x; ++x) {
 					Float f00 = m_data[y * m_dataSize.x + x];
 					Float f10 = m_data[y * m_dataSize.x + x + 1];
 					Float f01 = m_data[(y + 1) * m_dataSize.x + x];
 					Float f11 = m_data[(y + 1) * m_dataSize.x + x + 1];
-					Float fmin = std::min(std::min(f00, f01), std::min(f10, f11));
-					Float fmax = std::max(std::max(f00, f01), std::max(f10, f11));
-					*bounds++ = Interval(fmin, fmax);
 
-					/* Estimate the total surface area (this is approximate) */
-					Float diff0 = f01-f10, diff1 = f00-f11;
-					m_surfaceArea += std::sqrt(1.0f + .5f * (diff0*diff0 + diff1*diff1));
+					m_normals[y       * m_dataSize.x + x]     += normalize(Normal(f00 - f10, f00 - f01, 1));
+					m_normals[y       * m_dataSize.x + x + 1] += normalize(Normal(f00 - f10, f10 - f11, 1));
+					m_normals[(y + 1) * m_dataSize.x + x]     += normalize(Normal(f01 - f11, f00 - f01, 1));
+					m_normals[(y + 1) * m_dataSize.x + x + 1] += normalize(Normal(f01 - f11, f10 - f11, 1));
 				}
 			}
 
-			/* Propagate height bounds upwards to the other layers */
-			for (int level=1; level<m_levelCount; ++level) {
-				Vector2i &cur  = m_levelSize[level],
-				         &prev = m_levelSize[level-1];
-
-				/* Calculate size of this layer */
-				cur.x = prev.x > 1 ? (prev.x / 2) : 1;
-				cur.y = prev.y > 1 ? (prev.y / 2) : 1;
-
-				m_numChildren[level].x = prev.x > 1 ? 2 : 1;
-				m_numChildren[level].y = prev.y > 1 ? 2 : 1;
-				m_blockSize[level] = Vector2(
-					m_levelSize[0].x / cur.x,
-					m_levelSize[0].y / cur.y
-				);
-
-				/* Allocate memory for interval data */
-				Interval *prevBounds = m_minmax[level-1], *curBounds;
-				size_t size = (size_t) cur.x * (size_t) cur.y * sizeof(Interval);
-				m_minmax[level] = curBounds = (Interval *) allocAligned(size);
-				storageSize += size;
-
-				/* Build by querying the previous layer */
-				for (int y=0; y<cur.y; ++y) {
-					int y0 = std::min(2*y,   prev.y-1),
-						y1 = std::min(2*y+1, prev.y-1);
-					for (int x=0; x<cur.x; ++x) {
-						int x0 = std::min(2*x,   prev.x-1),
-							x1 = std::min(2*x+1, prev.x-1);
-						const Interval &f00 = prevBounds[y0 * prev.x + x0], &f01 = prevBounds[y0 * prev.x + x1];
-						const Interval &f10 = prevBounds[y1 * prev.x + x0], &f11 = prevBounds[y1 * prev.x + x1];
-						Interval combined(f00);
-						combined.expandBy(f01);
-						combined.expandBy(f10);
-						combined.expandBy(f11);
-						*curBounds++ = combined;
-					}
+			for (int y=0; y<m_dataSize.y; ++y) {
+				for (int x=0; x<m_dataSize.x; ++x) {
+					Normal &normal = m_normals[x + y * m_dataSize.x];
+					normal /= normal.length();
 				}
 			}
-
-			if (m_shadingNormals) {
-				Log(EInfo, "Precomputing shading normals ..");
-
-				for (int y=0; y<m_levelSize[0].y; ++y) {
-					for (int x=0; x<m_levelSize[0].x; ++x) {
-						Float f00 = m_data[y * m_dataSize.x + x];
-						Float f10 = m_data[y * m_dataSize.x + x + 1];
-						Float f01 = m_data[(y + 1) * m_dataSize.x + x];
-						Float f11 = m_data[(y + 1) * m_dataSize.x + x + 1];
-
-						m_normals[y       * m_dataSize.x + x]     += normalize(Normal(f00 - f10, f00 - f01, 1));
-						m_normals[y       * m_dataSize.x + x + 1] += normalize(Normal(f00 - f10, f10 - f11, 1));
-						m_normals[(y + 1) * m_dataSize.x + x]     += normalize(Normal(f01 - f11, f00 - f01, 1));
-						m_normals[(y + 1) * m_dataSize.x + x + 1] += normalize(Normal(f01 - f11, f10 - f11, 1));
-					}
-				}
-
-				for (int y=0; y<m_dataSize.y; ++y) {
-					for (int x=0; x<m_dataSize.x; ++x) {
-						Normal &normal = m_normals[x + y * m_dataSize.x];
-						normal /= normal.length();
-					}
-				}
-			}
-
-			Log(EInfo, "Done (took %i ms, uses %s of memory)", timer->getMilliseconds(),
-					memString(storageSize).c_str());
-
-			m_dataAABB = AABB(
-				Point3(0, 0, m_minmax[m_levelCount-1][0].min),
-				Point3(m_levelSize[0].x, m_levelSize[0].y, m_minmax[m_levelCount-1][0].max)
-			);
-		} else {
-			Shape::addChild(name, child);
 		}
+
+		Log(EInfo, "Done (took %i ms, uses %s of memory)", timer->getMilliseconds(),
+				memString(storageSize).c_str());
+
+		m_dataAABB = AABB(
+			Point3(0, 0, m_minmax[m_levelCount-1][0].min),
+			Point3(m_levelSize[0].x, m_levelSize[0].y, m_minmax[m_levelCount-1][0].max)
+		);
 	}
 
 	std::string toString() const {
 		std::ostringstream oss;
-		oss << "Heightfield[" << endl
+		oss << "HeightField[" << endl
+			<< "  size = " << m_dataSize.toString() << "," << endl
+			<< "  shadingNormals = " << m_shadingNormals << "," << endl
+			<< "  objectToWorld = " << indent(m_objectToWorld.toString()) << "," << endl
+			<< "  bsdf = " << indent(m_bsdf.toString()) << "," << endl;
+		if (isMediumTransition())
+			oss << "  interiorMedium = " << indent(m_interiorMedium.toString()) << "," << endl
+				<< "  exteriorMedium = " << indent(m_exteriorMedium.toString()) << "," << endl;
+		oss << "  emitter = " << indent(m_emitter.toString()) << "," << endl
+			<< "  sensor = " << indent(m_sensor.toString()) << "," << endl
+			<< "  subsurface = " << indent(m_subsurface.toString()) << endl
 			<< "]";
 		return oss.str();
 	}
@@ -436,6 +475,7 @@ private:
 	Vector2i m_sizeHint;
 	AABB m_dataAABB;
 	bool m_shadingNormals;
+	bool m_flipNormals;
 
 	/* Height field data */
 	Float *m_data;

From 3ed7a518a31c7032686ae83124ce8abfe7844882 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Wed, 11 Sep 2013 14:59:13 +0200
Subject: [PATCH 20/43] heightfield: support for transformations, flipNormals
 parameter

---
 src/shapes/heightfield.cpp | 39 +++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
index 17814d8a..edc07349 100644
--- a/src/shapes/heightfield.cpp
+++ b/src/shapes/heightfield.cpp
@@ -144,11 +144,6 @@ public:
 		int iDeltaX = signumToInt(ray.d.x),
 			iDeltaY = signumToInt(ray.d.y);
 
-		if (iDeltaX == 0 && iDeltaY == 0) {
-			/* TODO: special case for perpendicular rays */
-			return false;
-		}
-
 		int stackIdx = 0;
 		stack[stackIdx].level = m_levelCount-1;
 		stack[stackIdx].x = 0;
@@ -228,9 +223,12 @@ public:
 				if (!solveQuadratic(A, B, C, t0, t1))
 					continue;
 
-				if (t0 >= -Epsilon && t0 <= tMax + Epsilon)
+				Float min = std::max(-Epsilon, mint - nearT);
+				Float max = std::min(tMax + Epsilon, maxt - nearT);
+
+				if (t0 >= min && t0 <= max)
 					t = t0;
-				else if (t1 >= -Epsilon && t1 <= tMax + Epsilon)
+				else if (t1 >= min && t1 <= max)
 					t = t1;
 				else
 					continue;
@@ -244,7 +242,6 @@ public:
 					t += nearT;
 				}
 				numTraversals += nTraversals;
-
 				return true;
 			}
 		}
@@ -264,11 +261,12 @@ public:
 			f10 = m_data[y     * width + x + 1],
 			f11 = m_data[(y+1) * width + x + 1];
 
-		its.p  = Point(temp.p.x + temp.x, temp.p.y + temp.y, temp.p.z);
+		Point pLocal(temp.p.x + temp.x, temp.p.y + temp.y, temp.p.z);
 
-		its.uv = Point2(its.p.x / m_levelSize[0].x, its.p.y / m_levelSize[0].y);
-		its.dpdu = Vector(1, 0, (1.0f - temp.p.y) * (f10 - f00) + temp.p.y * (f11 - f01)) * m_levelSize[0].x;
-		its.dpdv = Vector(0, 1, (1.0f - temp.p.x) * (f01 - f00) + temp.p.x * (f11 - f10)) * m_levelSize[0].y;
+		its.p  = m_objectToWorld(pLocal);
+		its.uv = Point2(pLocal.x / m_levelSize[0].x, pLocal.y / m_levelSize[0].y);
+		its.dpdu = m_objectToWorld(Vector(1, 0, (1.0f - temp.p.y) * (f10 - f00) + temp.p.y * (f11 - f01)) * m_levelSize[0].x);
+		its.dpdv = m_objectToWorld(Vector(0, 1, (1.0f - temp.p.x) * (f01 - f00) + temp.p.x * (f11 - f10)) * m_levelSize[0].y);
 
 		its.geoFrame.s = normalize(its.dpdu);
 		its.geoFrame.t = normalize(its.dpdv - dot(its.dpdv, its.geoFrame.s) * its.geoFrame.s);
@@ -281,17 +279,22 @@ public:
 				n10 = m_normals[y     * width + x + 1],
 				n11 = m_normals[(y+1) * width + x + 1];
 
-			its.shFrame.n = normalize(
+			its.shFrame.n = normalize(m_objectToWorld(Normal(
 				(1 - temp.p.x) * ((1-temp.p.y) * n00 + temp.p.y * n01)
-				   + temp.p.x  * ((1-temp.p.y) * n10 + temp.p.y * n11));
+				   + temp.p.x  * ((1-temp.p.y) * n10 + temp.p.y * n11))));
 
 			its.shFrame.s = normalize(its.geoFrame.s - dot(its.geoFrame.s, its.shFrame.n) * its.shFrame.n);
 			its.shFrame.t = cross(its.shFrame.n, its.shFrame.s);
 		} else {
 			its.shFrame = its.geoFrame;
 		}
-		its.shape = this;
 
+		if (m_flipNormals) {
+			its.shFrame.n *= -1;
+			its.geoFrame.n *= -1;
+		}
+
+		its.shape = this;
  		its.wi = its.toLocal(-ray.d);
  		its.hasUVPartials = false;
 		its.instance = NULL;
@@ -331,6 +334,10 @@ public:
 			m_data = (Float *) allocAligned(size);
 			bitmap->convert(m_data, Bitmap::ELuminance, Bitmap::EFloat);
 
+			m_objectToWorld = m_objectToWorld * Transform::translate(Vector(-1, -1, 0)) * Transform::scale(Vector(
+				(Float) 2 / (m_dataSize.x-1),
+				(Float) 2 / (m_dataSize.y-1), 1));
+
 			buildInternal();
 		} else {
 			Shape::addChild(name, child);
@@ -457,7 +464,9 @@ public:
 		oss << "HeightField[" << endl
 			<< "  size = " << m_dataSize.toString() << "," << endl
 			<< "  shadingNormals = " << m_shadingNormals << "," << endl
+			<< "  flipNormals = " << m_flipNormals << "," << endl
 			<< "  objectToWorld = " << indent(m_objectToWorld.toString()) << "," << endl
+			<< "  aabb = " << indent(getAABB().toString()) << "," << endl
 			<< "  bsdf = " << indent(m_bsdf.toString()) << "," << endl;
 		if (isMediumTransition())
 			oss << "  interiorMedium = " << indent(m_interiorMedium.toString()) << "," << endl

From a67da0ef9d5ca261ac6566264db35630fae0136a Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Wed, 11 Sep 2013 17:45:02 +0200
Subject: [PATCH 21/43] heightfield: fast start

---
 src/shapes/heightfield.cpp | 109 +++++++++++++++++++++++++++----------
 1 file changed, 80 insertions(+), 29 deletions(-)

diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
index edc07349..1609b77f 100644
--- a/src/shapes/heightfield.cpp
+++ b/src/shapes/heightfield.cpp
@@ -28,7 +28,8 @@
 #include <mitsuba/core/statistics.h>
 #include <mitsuba/core/timer.h>
 
-#define MTS_QTREE_MAXDEPTH 50
+#define MTS_QTREE_MAXDEPTH  50
+#define MTS_QTREE_FASTSTART 1
 
 MTS_NAMESPACE_BEGIN
 
@@ -38,11 +39,11 @@ namespace {
 	/// Find the smallest t >= 0 such that a*t + b is a multiple of c
 	inline Float nextMultiple(Float a, Float b, Float c) {
 		Float tmp     = b/c,
-			  rounded = (a > 0 ? std::ceil(tmp) : std::floor(tmp)) * c,
-			  diff    = rounded - b;
+		      rounded = (a > 0 ? std::ceil(tmp) : std::floor(tmp)) * c,
+		      diff    = rounded - b;
 
 		if (diff == 0)
-			diff = signum(a) * c;
+			diff = math::signum(a) * c;
 
 		return diff / a;
 	}
@@ -95,6 +96,7 @@ public:
 			delete[] m_levelSize;
 			delete[] m_numChildren;
 			delete[] m_blockSize;
+			delete[] m_blockSizeF;
 		}
 		if (m_normals)
 			freeAligned(m_normals);
@@ -145,38 +147,78 @@ public:
 			iDeltaY = signumToInt(ray.d.y);
 
 		int stackIdx = 0;
-		stack[stackIdx].level = m_levelCount-1;
-		stack[stackIdx].x = 0;
-		stack[stackIdx].y = 0;
 
-		numTraversals.incrementBase();
+		#if MTS_QTREE_FASTSTART
+			/* If the entire ray is restricted to a subtree of the quadtree,
+			   directly start the traversal from the there instead of the root
+			   node. This can save some unnecessary work. */
+			{
+				Point enterPt, exitPt;
+				Float nearT = mint, farT = maxt;
+				if (!m_dataAABB.rayIntersect(ray, nearT, farT, enterPt, exitPt))
+					return false;
 
-		size_t nTraversals = 0;
+				/* Determine minima and maxima in integer coordinates (round down!) */
+				int minX = (int) std::min(enterPt.x, exitPt.x),
+				    maxX = (int) std::max(enterPt.x, exitPt.x),
+				    minY = (int) std::min(enterPt.y, exitPt.y),
+				    maxY = (int) std::max(enterPt.y, exitPt.y);
+
+				/* Determine quadtree level */
+				int level = clamp(1 + log2i(
+					std::max((uint32_t) (minX ^ maxX), (uint32_t) (minY ^ maxY))),
+					0, m_levelCount-1);
+
+				/* Compute X and Y coordinates at that level */
+				const Vector2i &blockSize = m_blockSize[level];
+				int x = clamp(minX / blockSize.x, 0, m_levelSize[level].x-1),
+				    y = clamp(minY / blockSize.y, 0, m_levelSize[level].y-1);
+
+				stack[stackIdx].level = level;
+				stack[stackIdx].x = x;
+				stack[stackIdx].y = y;
+			}
+		#else
+			/* Start traversal from the root node of the quadtree */
+			stack[stackIdx].level = m_levelCount-1;
+			stack[stackIdx].x = 0;
+			stack[stackIdx].y = 0;
+		#endif
+
+
+		//numTraversals.incrementBase();
+
+		//size_t nTraversals = 0;
 		while (stackIdx >= 0) {
-			StackEntry entry         = stack[stackIdx--];
-			const Interval &interval = m_minmax[entry.level][entry.x + entry.y * m_levelSize[entry.level].x];
-			const Vector2 &blockSize = m_blockSize[entry.level];
-			Ray localRay(Point(ray.o.x - entry.x*blockSize.x, ray.o.y - entry.y*blockSize.y, ray.o.z), ray.d, 0);
+			//++nTraversals;
 
-			/* Intersect against the current min-max quadtree node */
+			/* Pop a node from the stack and compute its bounding box */
+			StackEntry entry         = stack[stackIdx--];
+			const Interval &interval = m_minmax[entry.level][
+				entry.x + entry.y * m_levelSize[entry.level].x];
+			const Vector2 &blockSize = m_blockSizeF[entry.level];
 			AABB aabb(
 				Point3(0, 0, interval.min),
 				Point3(blockSize.x, blockSize.y, interval.max)
 			);
 
+			/* Intersect the ray against the bounding box, in local coordinates */
+			Ray localRay(Point(ray.o.x - entry.x*blockSize.x,
+			                   ray.o.y - entry.y*blockSize.y, ray.o.z), ray.d, 0);
 			Float nearT = mint, farT = maxt;
 			Point enterPt, exitPt;
 
-			++nTraversals;
-			if (!aabb.rayIntersect(localRay, nearT, farT, enterPt, exitPt))
+			if (!aabb.rayIntersect(localRay, nearT, farT, enterPt, exitPt)) {
+				/* The bounding box was not intersected -- skip */
 				continue;
+			}
 
 			Float tMax = farT - nearT;
 
 			if (entry.level > 0) {
 				/* Inner node -- push child nodes in 2D DDA order */
 				const Vector2i &numChildren = m_numChildren[entry.level];
-				const Vector2 &subBlockSize = m_blockSize[--entry.level];
+				const Vector2 &subBlockSize = m_blockSizeF[--entry.level];
 				entry.x *= numChildren.x; entry.y *= numChildren.y;
 
 				int x = (exitPt.x >= subBlockSize.x) ? numChildren.x-1 : 0;
@@ -184,9 +226,9 @@ public:
 
 				Float tDeltaX = tDeltaXSingle * subBlockSize.x,
 				      tDeltaY = tDeltaYSingle * subBlockSize.y,
-					  tNextX  = nextMultiple(-ray.d.x, exitPt.x, subBlockSize.x),
-					  tNextY  = nextMultiple(-ray.d.y, exitPt.y, subBlockSize.y),
-					  t       = 0;
+				      tNextX  = nextMultiple(-ray.d.x, exitPt.x, subBlockSize.x),
+				      tNextY  = nextMultiple(-ray.d.y, exitPt.y, subBlockSize.y),
+				      t       = 0;
 
 				while ((uint32_t) x < (uint32_t) numChildren.x &&
 				       (uint32_t) y < (uint32_t) numChildren.y && t <= tMax) {
@@ -205,6 +247,7 @@ public:
 					}
 				}
 			} else {
+				/* Intersect the ray against a bilinear patch */
 				Float
 					f00 = m_data[entry.y * m_dataSize.x + entry.x],
 					f01 = m_data[(entry.y + 1) * m_dataSize.x + entry.x],
@@ -241,12 +284,12 @@ public:
 					temp.p = pLocal;
 					t += nearT;
 				}
-				numTraversals += nTraversals;
+				//numTraversals += nTraversals;
 				return true;
 			}
 		}
 
-		numTraversals += nTraversals;
+		//numTraversals += nTraversals;
 		return false;
 	}
 
@@ -264,9 +307,11 @@ public:
 		Point pLocal(temp.p.x + temp.x, temp.p.y + temp.y, temp.p.z);
 
 		its.p  = m_objectToWorld(pLocal);
-		its.uv = Point2(pLocal.x / m_levelSize[0].x, pLocal.y / m_levelSize[0].y);
-		its.dpdu = m_objectToWorld(Vector(1, 0, (1.0f - temp.p.y) * (f10 - f00) + temp.p.y * (f11 - f01)) * m_levelSize[0].x);
-		its.dpdv = m_objectToWorld(Vector(0, 1, (1.0f - temp.p.x) * (f01 - f00) + temp.p.x * (f11 - f10)) * m_levelSize[0].y);
+		its.uv = Point2(pLocal.x * m_invSize.x, pLocal.y / m_invSize.y);
+		its.dpdu = m_objectToWorld(Vector(1, 0,
+			(1.0f - temp.p.y) * (f10 - f00) + temp.p.y * (f11 - f01)) * m_levelSize[0].x);
+		its.dpdv = m_objectToWorld(Vector(0, 1,
+			(1.0f - temp.p.x) * (f01 - f00) + temp.p.x * (f11 - f10)) * m_levelSize[0].y);
 
 		its.geoFrame.s = normalize(its.dpdu);
 		its.geoFrame.t = normalize(its.dpdv - dot(its.dpdv, its.geoFrame.s) * its.geoFrame.s);
@@ -353,11 +398,14 @@ public:
 
 		m_levelSize = new Vector2i[m_levelCount];
 		m_numChildren = new Vector2i[m_levelCount];
-		m_blockSize = new Vector2[m_levelCount];
+		m_blockSize = new Vector2i[m_levelCount];
+		m_blockSizeF = new Vector2[m_levelCount];
 		m_minmax = new Interval*[m_levelCount];
 
 		m_levelSize[0]  = Vector2i(m_dataSize.x - 1, m_dataSize.y - 1);
-		m_blockSize[0] = Vector2(1, 1);
+		m_blockSize[0] = Vector2i(1, 1);
+		m_blockSizeF[0] = Vector2(1, 1);
+		m_invSize = Vector2((Float) 1 / m_levelSize[0].x, (Float) 1 / m_levelSize[0].y);
 		m_surfaceArea = 0;
 		size_t size = (size_t) m_levelSize[0].x * (size_t) m_levelSize[0].y * sizeof(Interval);
 		m_minmax[0] = (Interval *) allocAligned(size);
@@ -392,10 +440,11 @@ public:
 
 			m_numChildren[level].x = prev.x > 1 ? 2 : 1;
 			m_numChildren[level].y = prev.y > 1 ? 2 : 1;
-			m_blockSize[level] = Vector2(
+			m_blockSize[level] = Vector2i(
 				m_levelSize[0].x / cur.x,
 				m_levelSize[0].y / cur.y
 			);
+			m_blockSizeF[level] = Vector2f(m_blockSize[level]);
 
 			/* Allocate memory for interval data */
 			Interval *prevBounds = m_minmax[level-1], *curBounds;
@@ -490,13 +539,15 @@ private:
 	Float *m_data;
 	Normal *m_normals;
 	Vector2i m_dataSize;
+	Vector2 m_invSize;
 	Float m_surfaceArea;
 
 	/* Min-max quadtree data */
 	int m_levelCount;
 	Vector2i *m_levelSize;
 	Vector2i *m_numChildren;
-	Vector2 *m_blockSize;
+	Vector2i *m_blockSize;
+	Vector2 *m_blockSizeF;
 	Interval **m_minmax;
 };
 

From 985620a2f4c2a8c9c82af8944376a3f82d5f1ee5 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Thu, 12 Sep 2013 14:18:17 +0200
Subject: [PATCH 22/43] reenable statistics

---
 src/shapes/heightfield.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
index 1609b77f..394be784 100644
--- a/src/shapes/heightfield.cpp
+++ b/src/shapes/heightfield.cpp
@@ -185,12 +185,11 @@ public:
 			stack[stackIdx].y = 0;
 		#endif
 
+		numTraversals.incrementBase();
 
-		//numTraversals.incrementBase();
-
-		//size_t nTraversals = 0;
+		size_t nTraversals = 0;
 		while (stackIdx >= 0) {
-			//++nTraversals;
+			++nTraversals;
 
 			/* Pop a node from the stack and compute its bounding box */
 			StackEntry entry         = stack[stackIdx--];
@@ -284,12 +283,12 @@ public:
 					temp.p = pLocal;
 					t += nearT;
 				}
-				//numTraversals += nTraversals;
+				numTraversals += nTraversals;
 				return true;
 			}
 		}
 
-		//numTraversals += nTraversals;
+		numTraversals += nTraversals;
 		return false;
 	}
 
@@ -366,7 +365,7 @@ public:
 			if (!isPowerOfTwo(m_dataSize.y - 1)) m_dataSize.y = (int) roundToPowerOfTwo((uint32_t) m_dataSize.y - 1) + 1;
 
 			if (bitmap->getSize() != m_dataSize) {
-				Log(EInfo, "Resampling heightfield texture from %ix%i to %ix%i..",
+				Log(EInfo, "Resampling heightfield texture from %ix%i to %ix%i ..",
 					bitmap->getWidth(), bitmap->getHeight(), m_dataSize.x, m_dataSize.y);
 
 				bitmap = bitmap->resample(NULL, ReconstructionFilter::EClamp,

From 263d67263784d3ebfb44426063aeaefc9d17fe5c Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Thu, 12 Sep 2013 14:18:42 +0200
Subject: [PATCH 23/43] introduced CPU affinity feature, better Thread::getID()
 function on Linux

---
 include/mitsuba/core/sched.h      |  2 +-
 include/mitsuba/core/statistics.h |  3 ++
 include/mitsuba/core/thread.h     | 13 +++++++
 src/libcore/sched.cpp             |  3 +-
 src/libcore/thread.cpp            | 56 +++++++++++++++++++++++++++++--
 src/libpython/core.cpp            |  6 ++--
 src/mitsuba/mitsuba.cpp           |  2 +-
 src/mitsuba/mtssrv.cpp            |  2 +-
 src/mitsuba/mtsutil.cpp           |  2 +-
 src/mtsgui/mainwindow.cpp         |  7 ++--
 10 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/include/mitsuba/core/sched.h b/include/mitsuba/core/sched.h
index 54e8d8dd..8cc03bb0 100644
--- a/include/mitsuba/core/sched.h
+++ b/include/mitsuba/core/sched.h
@@ -758,7 +758,7 @@ protected:
  */
 class MTS_EXPORT_CORE LocalWorker : public Worker {
 public:
-	LocalWorker(const std::string &name,
+	LocalWorker(int coreID, const std::string &name,
 		Thread::EThreadPriority priority = Thread::ENormalPriority);
 
 	MTS_DECLARE_CLASS()
diff --git a/include/mitsuba/core/statistics.h b/include/mitsuba/core/statistics.h
index 6856b271..a21a6ad5 100644
--- a/include/mitsuba/core/statistics.h
+++ b/include/mitsuba/core/statistics.h
@@ -23,6 +23,8 @@
 #include <mitsuba/core/timer.h>
 #include <mitsuba/core/atomic.h>
 
+//#define MTS_NO_STATISTICS 1
+
 #if defined(_MSC_VER)
 # include <intrin.h>
 #endif
@@ -110,6 +112,7 @@ public:
 	inline uint64_t operator++() {
 #if defined(MTS_NO_STATISTICS)
 		// do nothing
+		return 0;
 #elif defined(_MSC_VER) && defined(_WIN64)
 		const int offset = Thread::getID() & NUM_COUNTERS_MASK;
 		_InterlockedExchangeAdd64(reinterpret_cast<__int64 volatile *>(&m_value[offset].value), 1);
diff --git a/include/mitsuba/core/thread.h b/include/mitsuba/core/thread.h
index 5b099912..e08065ca 100644
--- a/include/mitsuba/core/thread.h
+++ b/include/mitsuba/core/thread.h
@@ -66,6 +66,19 @@ public:
 	/// Return the thread priority
 	EThreadPriority getPriority() const;
 
+	/**
+	 * \brief Set the core affinity
+	 *
+	 * This function provides a hint to the operating system
+	 * scheduler that the thread should preferably run
+	 * on the specified processor core. By default, the parameter
+	 * is set to -1, which means that there is no affinity.
+	 */
+	void setCoreAffinity(int core);
+
+	/// Return the core affinity
+	int getCoreAffinity() const;
+
 	/**
 	 * \brief Specify whether or not this thread is critical
 	 *
diff --git a/src/libcore/sched.cpp b/src/libcore/sched.cpp
index 21f9a734..c95f2218 100644
--- a/src/libcore/sched.cpp
+++ b/src/libcore/sched.cpp
@@ -631,8 +631,9 @@ void Worker::start(Scheduler *scheduler, int workerIndex, int coreOffset) {
 	Thread::start();
 }
 
-LocalWorker::LocalWorker(const std::string &name,
+LocalWorker::LocalWorker(int coreID, const std::string &name,
 		Thread::EThreadPriority priority) : Worker(name) {
+	setCoreAffinity(coreID);
 	m_coreCount = 1;
 #if !defined(__LINUX__)
 	/* Don't set thead priority on Linux, since it uses
diff --git a/src/libcore/thread.cpp b/src/libcore/thread.cpp
index 9fd9e7fb..e4b85135 100644
--- a/src/libcore/thread.cpp
+++ b/src/libcore/thread.cpp
@@ -27,6 +27,7 @@
 // Required for native thread functions
 #if defined(__LINUX__)
 # include <sys/prctl.h>
+# include <sys/syscall.h>
 #elif defined(__OSX__)
 # include <pthread.h>
 #elif defined(__WINDOWS__)
@@ -35,7 +36,6 @@
 
 MTS_NAMESPACE_BEGIN
 
-
 #if defined(_MSC_VER)
 namespace {
 // Helper function to set a native thread name. MSDN:
@@ -69,6 +69,10 @@ void SetThreadName(const char* threadName, DWORD dwThreadID = -1) {
 } // namespace
 #endif // _MSC_VER
 
+#if defined(__LINUX__)
+static pthread_key_t __thread_id;
+#endif
+
 /**
  * Internal Thread members
  */
@@ -80,13 +84,15 @@ struct Thread::ThreadPrivate {
 	std::string name;
 	bool running, joined;
 	Thread::EThreadPriority priority;
+	int coreAffinity;
 	static ThreadLocal<Thread> *self;
 	bool critical;
 	boost::thread thread;
 
 	ThreadPrivate(const std::string & name_) :
 		name(name_), running(false), joined(false),
-		priority(Thread::ENormalPriority), critical(false) { }
+		priority(Thread::ENormalPriority), coreAffinity(-1),
+	    critical(false) { }
 };
 
 /**
@@ -143,7 +149,10 @@ int Thread::getID() {
 #elif defined(__OSX__)
 	return static_cast<int>(pthread_mach_thread_np(pthread_self()));
 #else
-	return (int) pthread_self();
+	/* pthread_self() doesn't provide nice increasing IDs, and syscall(SYS_gettid)
+	   causes a context switch. Use a thread-local variable that caches the
+	   result of syscall(SYS_gettid) */
+	return static_cast<int>(reinterpret_cast<intptr_t>(pthread_getspecific(__thread_id)));
 #endif
 }
 
@@ -297,9 +306,45 @@ bool Thread::setPriority(EThreadPriority priority) {
 	return true;
 }
 
+void Thread::setCoreAffinity(int coreID) {
+	d->coreAffinity = coreID;
+	if (!d->running)
+		return;
+
+	int nCores = getCoreCount();
+#if defined(__LINUX__) || defined(__OSX__)
+	cpu_set_t *cpuset = CPU_ALLOC(nCores);
+	if (cpuset == NULL)
+		Log(EError, "Thread::setCoreAffinity: could not allocate cpu_set_t");
+
+	size_t size = CPU_ALLOC_SIZE(nCores);
+	CPU_ZERO_S(size, cpuset);
+	if (coreID != -1 && coreID < nCores) {
+		CPU_SET_S(coreID, size, cpuset);
+	} else {
+		for (int i=0; i<nCores; ++i)
+			CPU_SET_S(i, size, cpuset);
+	}
+
+	const pthread_t threadID = d->thread.native_handle();
+	if (pthread_setaffinity_np(threadID, size, cpuset) != 0)
+		Log(EWarn, "pthread_setaffinity_np: failed");
+	CPU_FREE(cpuset);
+#endif
+}
+
+int Thread::getCoreAffinity() const {
+	return d->coreAffinity;
+}
+
+
 void Thread::dispatch(Thread *thread) {
 	detail::initializeLocalTLS();
 
+#if defined(__LINUX__)
+	pthread_setspecific(__thread_id, reinterpret_cast<void *>(syscall(SYS_gettid)));
+#endif
+
 	Thread::ThreadPrivate::self->set(thread);
 
 	if (thread->getPriority() != ENormalPriority)
@@ -319,6 +364,9 @@ void Thread::dispatch(Thread *thread) {
 #endif
 	}
 
+	if (thread->getCoreAffinity() != -1)
+		thread->setCoreAffinity(thread->getCoreAffinity());
+
 	try {
 		thread->run();
 	} catch (std::exception &e) {
@@ -418,6 +466,7 @@ void Thread::staticInitialization() {
 #endif
 	detail::initializeGlobalTLS();
 	detail::initializeLocalTLS();
+	pthread_key_create(&__thread_id, NULL);
 
 	ThreadPrivate::self = new ThreadLocal<Thread>();
 	Thread *mainThread = new MainThread();
@@ -499,6 +548,7 @@ void Thread::initializeOpenMP(size_t threadCount) {
 
 			#if defined(__LINUX__)
 				prctl(PR_SET_NAME, threadName.c_str());
+				pthread_setspecific(__thread_id, reinterpret_cast<void *>(syscall(SYS_gettid)));
 			#elif defined(__OSX__)
 				pthread_setname_np(threadName.c_str());
 			#elif defined(__WINDOWS__)
diff --git a/src/libpython/core.cpp b/src/libpython/core.cpp
index 1ef776ff..70281f4e 100644
--- a/src/libpython/core.cpp
+++ b/src/libpython/core.cpp
@@ -638,6 +638,8 @@ void export_core() {
 		.def("getID", &Thread::getID)
 		.def("setPriority", &Thread::setPriority)
 		.def("getPriority", &Thread::getPriority)
+		.def("setCoreAffinity", &Thread::setCoreAffinity)
+		.def("getCoreAffinity", &Thread::getCoreAffinity)
 		.def("setCritical", &Thread::setCritical)
 		.def("getCritical", &Thread::getCritical)
 		.def("setName", &Thread::setName)
@@ -871,8 +873,8 @@ void export_core() {
 		.def("getCoreCount", &Worker::getCoreCount)
 		.def("isRemoteWorker", &Worker::isRemoteWorker);
 
-	BP_CLASS(LocalWorker, Worker, bp::init<const std::string>())
-		.def(bp::init<const std::string, Thread::EThreadPriority>());
+	BP_CLASS(LocalWorker, Worker, (bp::init<int, const std::string>()))
+		.def(bp::init<int, const std::string, Thread::EThreadPriority>());
 
 	BP_CLASS(RemoteWorker, Worker, (bp::init<const std::string, Stream *>()))
 		.def("getNodeName", &RemoteWorker::getNodeName, BP_RETURN_VALUE);
diff --git a/src/mitsuba/mitsuba.cpp b/src/mitsuba/mitsuba.cpp
index f06aa76d..4f003ab8 100644
--- a/src/mitsuba/mitsuba.cpp
+++ b/src/mitsuba/mitsuba.cpp
@@ -258,7 +258,7 @@ int mitsuba_app(int argc, char **argv) {
 		/* Configure the scheduling subsystem */
 		Scheduler *scheduler = Scheduler::getInstance();
 		for (int i=0; i<nprocs; ++i)
-			scheduler->registerWorker(new LocalWorker(formatString("wrk%i", i)));
+			scheduler->registerWorker(new LocalWorker(i, formatString("wrk%i", i)));
 		std::vector<std::string> hosts = tokenize(networkHosts, ";");
 
 		/* Establish network connections to nested servers */
diff --git a/src/mitsuba/mtssrv.cpp b/src/mitsuba/mtssrv.cpp
index a4625cd3..6101eec9 100644
--- a/src/mitsuba/mtssrv.cpp
+++ b/src/mitsuba/mtssrv.cpp
@@ -219,7 +219,7 @@ int mtssrv(int argc, char **argv) {
 		/* Configure the scheduling subsystem */
 		Scheduler *scheduler = Scheduler::getInstance();
 		for (int i=0; i<nprocs; ++i)
-			scheduler->registerWorker(new LocalWorker(formatString("wrk%i", i)));
+			scheduler->registerWorker(new LocalWorker(i, formatString("wrk%i", i)));
 		std::vector<std::string> hosts = tokenize(networkHosts, ";");
 
 		/* Establish network connections to nested servers */
diff --git a/src/mitsuba/mtsutil.cpp b/src/mitsuba/mtsutil.cpp
index b9365954..abf547fd 100644
--- a/src/mitsuba/mtsutil.cpp
+++ b/src/mitsuba/mtsutil.cpp
@@ -234,7 +234,7 @@ int mtsutil(int argc, char **argv) {
 		/* Configure the scheduling subsystem */
 		Scheduler *scheduler = Scheduler::getInstance();
 		for (int i=0; i<nprocs; ++i)
-			scheduler->registerWorker(new LocalWorker(formatString("wrk%i", i)));
+			scheduler->registerWorker(new LocalWorker(i, formatString("wrk%i", i)));
 		std::vector<std::string> hosts = tokenize(networkHosts, ";");
 
 		/* Establish network connections to nested servers */
diff --git a/src/mtsgui/mainwindow.cpp b/src/mtsgui/mainwindow.cpp
index 9ca8bf52..c96c2366 100644
--- a/src/mtsgui/mainwindow.cpp
+++ b/src/mtsgui/mainwindow.cpp
@@ -266,7 +266,7 @@ void MainWindow::initWorkers() {
 	m_workerPriority = (Thread::EThreadPriority)
 		settings.value("workerPriority", (int) Thread::ELowPriority).toInt();
 	for (int i=0; i<localWorkerCount; ++i)
-		scheduler->registerWorker(new LocalWorker(formatString("wrk%i", localWorkerCtr++), m_workerPriority));
+		scheduler->registerWorker(new LocalWorker(i, formatString("wrk%i", localWorkerCtr++), m_workerPriority));
 
 	int networkConnections = 0;
 	QList<QVariant> connectionData = settings.value("connections").toList();
@@ -314,7 +314,7 @@ void MainWindow::initWorkers() {
 		QMessageBox::warning(this, tr("Scheduler warning"),
 			tr("There must be at least one worker thread -- forcing creation of one."),
 			QMessageBox::Ok);
-		scheduler->registerWorker(new LocalWorker(formatString("wrk%i", localWorkerCtr++), m_workerPriority));
+		scheduler->registerWorker(new LocalWorker(0, formatString("wrk%i", localWorkerCtr++), m_workerPriority));
 	}
 
 	QStringList args = qApp->arguments();
@@ -1312,7 +1312,8 @@ void MainWindow::on_actionSettings_triggered() {
 			ref<Scheduler> sched = Scheduler::getInstance();
 			sched->pause();
 			while (d.getLocalWorkerCount() > (int) localWorkers.size()) {
-				LocalWorker *worker = new LocalWorker(formatString("wrk%i", localWorkerCtr++), m_workerPriority);
+				LocalWorker *worker = new LocalWorker(localWorkerCtr, formatString("wrk%i", localWorkerCtr), m_workerPriority);
+				localWorkerCtr++;
 				sched->registerWorker(worker);
 				localWorkers.push_back(worker);
 			}

From c900bea6b6592d09718b44b1318f7273e623124e Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Thu, 12 Sep 2013 14:53:35 +0200
Subject: [PATCH 24/43] osx fixes

---
 src/libcore/thread.cpp | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/src/libcore/thread.cpp b/src/libcore/thread.cpp
index e4b85135..59486fbd 100644
--- a/src/libcore/thread.cpp
+++ b/src/libcore/thread.cpp
@@ -311,11 +311,13 @@ void Thread::setCoreAffinity(int coreID) {
 	if (!d->running)
 		return;
 
+#if defined(__OSX__)
+	/* CPU affinity not supported on OSX */
+#elif defined(__LINUX__)
 	int nCores = getCoreCount();
-#if defined(__LINUX__) || defined(__OSX__)
 	cpu_set_t *cpuset = CPU_ALLOC(nCores);
 	if (cpuset == NULL)
-		Log(EError, "Thread::setCoreAffinity: could not allocate cpu_set_t");
+		Log(EError, "Thread::setCoreAffinity(): could not allocate cpu_set_t");
 
 	size_t size = CPU_ALLOC_SIZE(nCores);
 	CPU_ZERO_S(size, cpuset);
@@ -328,8 +330,21 @@ void Thread::setCoreAffinity(int coreID) {
 
 	const pthread_t threadID = d->thread.native_handle();
 	if (pthread_setaffinity_np(threadID, size, cpuset) != 0)
-		Log(EWarn, "pthread_setaffinity_np: failed");
+		Log(EWarn, "Thread::setCoreAffinity(): pthread_setaffinity_np: failed");
 	CPU_FREE(cpuset);
+#elif defined(__WINDOWS__)
+	int nCores = getCoreCount();
+	const HANDLE handle = d->thread.native_handle();
+
+	DWORD_PTR mask;
+
+	if (coreID != -1 && coreID < nCores)
+		mask = (DWORD_PTR) 1 << coreID;
+	else
+		mask = (1 << nCores) - 1;
+
+	if (!SetThreadAffinityMask(handle, mask))
+		Log(EWarn, "Thread::setCoreAffinity(): SetThreadAffinityMask : failed");
 #endif
 }
 
@@ -458,15 +473,16 @@ int mts_omp_get_thread_num() {
 #endif
 
 void Thread::staticInitialization() {
-#if defined(__OSX__)
-	__mts_autorelease_init();
-#if defined(MTS_OPENMP)
-	__omp_threadCount = omp_get_max_threads();
-#endif
-#endif
+	#if defined(__OSX__)
+		__mts_autorelease_init();
+		#if defined(MTS_OPENMP)
+			__omp_threadCount = omp_get_max_threads();
+		#endif
+	#elif defined(__LINUX__)
+		pthread_key_create(&__thread_id, NULL);
+	#endif
 	detail::initializeGlobalTLS();
 	detail::initializeLocalTLS();
-	pthread_key_create(&__thread_id, NULL);
 
 	ThreadPrivate::self = new ThreadLocal<Thread>();
 	Thread *mainThread = new MainThread();
@@ -500,6 +516,9 @@ void Thread::staticShutdown() {
 	delete ThreadPrivate::self;
 	ThreadPrivate::self = NULL;
 	detail::destroyGlobalTLS();
+#if defined(__LINUX__)
+	pthread_key_delete(__thread_id);
+#endif
 #if defined(__OSX__)
 	#if defined(MTS_OPENMP)
 		if (__omp_key_created)

From b1928ed1f27245e47adcf84f1f23a8af078c3b34 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Thu, 12 Sep 2013 15:57:25 +0200
Subject: [PATCH 25/43] statistics: further performance improvements on osx

---
 src/libcore/thread.cpp | 39 ++++++++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/src/libcore/thread.cpp b/src/libcore/thread.cpp
index 59486fbd..97504e9c 100644
--- a/src/libcore/thread.cpp
+++ b/src/libcore/thread.cpp
@@ -18,6 +18,7 @@
 
 #include <mitsuba/core/lock.h>
 #include <mitsuba/core/fresolver.h>
+#include <mitsuba/core/atomic.h>
 #if defined(MTS_OPENMP)
 # include <omp.h>
 #endif
@@ -27,7 +28,6 @@
 // Required for native thread functions
 #if defined(__LINUX__)
 # include <sys/prctl.h>
-# include <sys/syscall.h>
 #elif defined(__OSX__)
 # include <pthread.h>
 #elif defined(__WINDOWS__)
@@ -69,8 +69,9 @@ void SetThreadName(const char* threadName, DWORD dwThreadID = -1) {
 } // namespace
 #endif // _MSC_VER
 
-#if defined(__LINUX__)
+#if defined(__LINUX__) || defined(__OSX__)
 static pthread_key_t __thread_id;
+static int __thread_id_ctr = 0;
 #endif
 
 /**
@@ -146,12 +147,10 @@ bool Thread::getCritical() const {
 int Thread::getID() {
 #if defined(__WINDOWS__)
 	return static_cast<int>(GetCurrentThreadId());
-#elif defined(__OSX__)
-	return static_cast<int>(pthread_mach_thread_np(pthread_self()));
-#else
+#elif defined(__OSX__) || defined(__LINUX__)
 	/* pthread_self() doesn't provide nice increasing IDs, and syscall(SYS_gettid)
-	   causes a context switch. Use a thread-local variable that caches the
-	   result of syscall(SYS_gettid) */
+	   causes a context switch. Thus, this function uses a thread-local variable
+	   to provide a nice linearly increasing sequence of thread IDs */
 	return static_cast<int>(reinterpret_cast<intptr_t>(pthread_getspecific(__thread_id)));
 #endif
 }
@@ -356,9 +355,18 @@ int Thread::getCoreAffinity() const {
 void Thread::dispatch(Thread *thread) {
 	detail::initializeLocalTLS();
 
-#if defined(__LINUX__)
-	pthread_setspecific(__thread_id, reinterpret_cast<void *>(syscall(SYS_gettid)));
-#endif
+	#if 0
+		#if defined(__LINUX__)
+			pthread_setspecific(__thread_id, reinterpret_cast<void *>(syscall(SYS_gettid)));
+		#elif defined(__OSX__)
+			pthread_setspecific(__thread_id, reinterpret_cast<void *>(pthread_mach_thread_np(pthread_self())));
+		#endif
+	#endif
+
+	#if defined(__LINUX__) || defined(__OSX__)
+		int id = atomicAdd(&__thread_id_ctr, 1);
+		pthread_setspecific(__thread_id, reinterpret_cast<void *>(id));
+	#endif
 
 	Thread::ThreadPrivate::self->set(thread);
 
@@ -478,7 +486,8 @@ void Thread::staticInitialization() {
 		#if defined(MTS_OPENMP)
 			__omp_threadCount = omp_get_max_threads();
 		#endif
-	#elif defined(__LINUX__)
+	#endif
+	#if defined(__LINUX__) || defined(__OSX__)
 		pthread_key_create(&__thread_id, NULL);
 	#endif
 	detail::initializeGlobalTLS();
@@ -516,7 +525,7 @@ void Thread::staticShutdown() {
 	delete ThreadPrivate::self;
 	ThreadPrivate::self = NULL;
 	detail::destroyGlobalTLS();
-#if defined(__LINUX__)
+#if defined(__LINUX__) || defined(__OSX__)
 	pthread_key_delete(__thread_id);
 #endif
 #if defined(__OSX__)
@@ -567,13 +576,17 @@ void Thread::initializeOpenMP(size_t threadCount) {
 
 			#if defined(__LINUX__)
 				prctl(PR_SET_NAME, threadName.c_str());
-				pthread_setspecific(__thread_id, reinterpret_cast<void *>(syscall(SYS_gettid)));
 			#elif defined(__OSX__)
 				pthread_setname_np(threadName.c_str());
 			#elif defined(__WINDOWS__)
 				SetThreadName(threadName.c_str());
 			#endif
 
+			#if defined(__LINUX__) || defined(__OSX__)
+				int id = atomicAdd(&__thread_id_ctr, 1);
+				pthread_setspecific(__thread_id, reinterpret_cast<void *>(id));
+			#endif
+
 			thread->d->running = false;
 			thread->d->joined = false;
 			thread->d->fresolver = fResolver;

From 139c358647d2bcf4e8e3ba6a5edcba5430282e39 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Thu, 12 Sep 2013 07:31:12 -0700
Subject: [PATCH 26/43] improved Thread::getID() on Windows

---
 src/libcore/thread.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/libcore/thread.cpp b/src/libcore/thread.cpp
index 97504e9c..74dafc1d 100644
--- a/src/libcore/thread.cpp
+++ b/src/libcore/thread.cpp
@@ -71,8 +71,10 @@ void SetThreadName(const char* threadName, DWORD dwThreadID = -1) {
 
 #if defined(__LINUX__) || defined(__OSX__)
 static pthread_key_t __thread_id;
-static int __thread_id_ctr = 0;
+#elif defined(__WINDOWS__)
+__declspec(thread) int __thread_id;
 #endif
+static int __thread_id_ctr = -1;
 
 /**
  * Internal Thread members
@@ -146,7 +148,7 @@ bool Thread::getCritical() const {
 
 int Thread::getID() {
 #if defined(__WINDOWS__)
-	return static_cast<int>(GetCurrentThreadId());
+	return __thread_id;
 #elif defined(__OSX__) || defined(__LINUX__)
 	/* pthread_self() doesn't provide nice increasing IDs, and syscall(SYS_gettid)
 	   causes a context switch. Thus, this function uses a thread-local variable
@@ -363,9 +365,11 @@ void Thread::dispatch(Thread *thread) {
 		#endif
 	#endif
 
+	int id = atomicAdd(&__thread_id_ctr, 1);
 	#if defined(__LINUX__) || defined(__OSX__)
-		int id = atomicAdd(&__thread_id_ctr, 1);
 		pthread_setspecific(__thread_id, reinterpret_cast<void *>(id));
+	#elif defined(__WINDOWS__)
+		__thread_id = id;
 	#endif
 
 	Thread::ThreadPrivate::self->set(thread);
@@ -582,9 +586,11 @@ void Thread::initializeOpenMP(size_t threadCount) {
 				SetThreadName(threadName.c_str());
 			#endif
 
+			int id = atomicAdd(&__thread_id_ctr, 1);
 			#if defined(__LINUX__) || defined(__OSX__)
-				int id = atomicAdd(&__thread_id_ctr, 1);
 				pthread_setspecific(__thread_id, reinterpret_cast<void *>(id));
+			#elif defined(__WINDOWS__)
+				__thread_id = id;
 			#endif
 
 			thread->d->running = false;

From 6bb4be1175a6003e0287d9dfbfd8004041b4faf7 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Fri, 13 Sep 2013 14:50:56 +0200
Subject: [PATCH 27/43] heightfield: added getNormalDerivative, createTriMesh

---
 src/shapes/heightfield.cpp | 127 +++++++++++++++++++++++++++++++++++--
 1 file changed, 120 insertions(+), 7 deletions(-)

diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
index 394be784..99697e66 100644
--- a/src/shapes/heightfield.cpp
+++ b/src/shapes/heightfield.cpp
@@ -304,9 +304,8 @@ public:
 			f11 = m_data[(y+1) * width + x + 1];
 
 		Point pLocal(temp.p.x + temp.x, temp.p.y + temp.y, temp.p.z);
-
+		its.uv = Point2(pLocal.x * m_invSize.x, pLocal.y * m_invSize.y);
 		its.p  = m_objectToWorld(pLocal);
-		its.uv = Point2(pLocal.x * m_invSize.x, pLocal.y / m_invSize.y);
 		its.dpdu = m_objectToWorld(Vector(1, 0,
 			(1.0f - temp.p.y) * (f10 - f00) + temp.p.y * (f11 - f01)) * m_levelSize[0].x);
 		its.dpdv = m_objectToWorld(Vector(0, 1,
@@ -317,11 +316,11 @@ public:
 		its.geoFrame.n = cross(its.geoFrame.s, its.geoFrame.t);
 
 		if (m_shadingNormals) {
-			Normal
-				n00 = m_normals[y     * width + x],
-				n01 = m_normals[(y+1) * width + x],
-				n10 = m_normals[y     * width + x + 1],
-				n11 = m_normals[(y+1) * width + x + 1];
+			const Normal
+				&n00 = m_normals[y     * width + x],
+				&n01 = m_normals[(y+1) * width + x],
+				&n10 = m_normals[y     * width + x + 1],
+				&n11 = m_normals[(y+1) * width + x + 1];
 
 			its.shFrame.n = normalize(m_objectToWorld(Normal(
 				(1 - temp.p.x) * ((1-temp.p.y) * n00 + temp.p.y * n01)
@@ -343,6 +342,7 @@ public:
  		its.hasUVPartials = false;
 		its.instance = NULL;
 		its.time = ray.time;
+		its.primIndex = x + y*width;
 	}
 
 	bool rayIntersect(const Ray &ray, Float mint, Float maxt) const {
@@ -350,6 +350,57 @@ public:
 		return rayIntersect(ray, mint, maxt, t, NULL);
 	}
 
+	void getNormalDerivative(const Intersection &its,
+			Vector &dndu, Vector &dndv, bool shadingFrame) const {
+		int width = m_dataSize.x,
+		    x = its.primIndex % width,
+		    y = its.primIndex / width;
+
+		Float u = its.uv.x * m_levelSize[0].x - x;
+		Float v = its.uv.y * m_levelSize[0].y - y;
+
+		Normal normal;
+		if (shadingFrame && m_shadingNormals) {
+			/* Derivatives for bilinear patch with interpolated shading normals */
+			const Normal
+				&n00 = m_normals[y     * width + x],
+				&n01 = m_normals[(y+1) * width + x],
+				&n10 = m_normals[y     * width + x + 1],
+				&n11 = m_normals[(y+1) * width + x + 1];
+
+			normal = m_objectToWorld(Normal(
+				(1 - u) * ((1-v) * n00 + v * n01)
+				   + u  * ((1-v) * n10 + v * n11)));
+
+			dndu = m_objectToWorld(Normal((1.0f - v) * (n10 - n00) + v * (n11 - n01))) * m_levelSize[0].x;
+			dndv = m_objectToWorld(Normal((1.0f - u) * (n01 - n00) + u * (n11 - n10))) * m_levelSize[0].y;
+		} else {
+			/* Derivatives for bilinear patch with geometric normals */
+			Float
+				f00 = m_data[y     * width + x],
+				f01 = m_data[(y+1) * width + x],
+				f10 = m_data[y     * width + x + 1],
+				f11 = m_data[(y+1) * width + x + 1];
+
+			normal = m_objectToWorld(
+				Normal(f00 - f10 + (f01 + f10 - f00 - f11)*v,
+					   f00 - f01 + (f01 + f10 - f00 - f11)*u, 1));
+
+			dndu = m_objectToWorld(Normal(0, f01 + f10 - f00 - f11, 0)) * m_levelSize[0].x;
+			dndv = m_objectToWorld(Normal(f01 + f10 - f00 - f11, 0, 0)) * m_levelSize[0].y;
+		}
+
+		/* Account for normalization */
+		Float invLength = 1/normal.length();
+
+		normal *= invLength;
+		dndu *= invLength;
+		dndv *= invLength;
+
+		dndu -= dot(normal, dndu) * normal;
+		dndv -= dot(normal, dndv) * normal;
+	}
+
 	void addChild(const std::string &name, ConfigurableObject *child) {
 		const Class *cClass = child->getClass();
 		if (cClass->derivesFrom(Texture::m_theClass)) {
@@ -507,6 +558,68 @@ public:
 		);
 	}
 
+	ref<TriMesh> createTriMesh() {
+		Vector2i size = m_dataSize;
+
+		/* Limit the size of the mesh */
+		while (size.x > 256 && size.y > 256) {
+			size.x = std::max(size.x / 2, 2);
+			size.y = std::max(size.y / 2, 2);
+		}
+
+		size_t numTris = 2 * (size_t) (size.x-1) * (size_t) (size.y-1);
+		size_t numVertices = (size_t) size.x * (size_t) size.y;
+
+		ref<TriMesh> mesh = new TriMesh("Height field approximation",
+			numTris, numVertices, false, true, false, false, !m_shadingNormals);
+
+		Point *vertices = mesh->getVertexPositions();
+		Point2 *texcoords = mesh->getVertexTexcoords();
+		Triangle *triangles = mesh->getTriangles();
+
+		Float dx = (Float) 1 / (size.x - 1);
+		Float dy = (Float) 1 / (size.y - 1);
+		Float scaleX = (Float) m_dataSize.x / size.x;
+		Float scaleY = (Float) m_dataSize.y / size.y;
+
+		uint32_t vertexIdx = 0;
+		for (int y=0; y<size.y; ++y) {
+			int py = std::min((int) (scaleY * y), m_dataSize.y-1);
+			for (int x=0; x<size.x; ++x) {
+				int px = std::min((int) (scaleX * x), m_dataSize.x-1);
+				texcoords[vertexIdx] = Point2(x*dx, y*dy);
+				vertices[vertexIdx++] = m_objectToWorld(Point(px, py,
+					m_data[px + py*m_dataSize.x]));
+			}
+		}
+		Assert(vertexIdx == numVertices);
+
+		uint32_t triangleIdx = 0;
+		for (int y=1; y<size.y; ++y) {
+			for (int x=0; x<size.x-1; ++x) {
+				uint32_t nextx = x + 1;
+				uint32_t idx0 = size.x*y + x;
+				uint32_t idx1 = size.x*y + nextx;
+				uint32_t idx2 = size.x*(y-1) + x;
+				uint32_t idx3 = size.x*(y-1) + nextx;
+
+				triangles[triangleIdx].idx[0] = idx0;
+				triangles[triangleIdx].idx[1] = idx2;
+				triangles[triangleIdx].idx[2] = idx1;
+				triangleIdx++;
+				triangles[triangleIdx].idx[0] = idx1;
+				triangles[triangleIdx].idx[1] = idx2;
+				triangles[triangleIdx].idx[2] = idx3;
+				triangleIdx++;
+			}
+		}
+		Assert(triangleIdx == numTris);
+		mesh->copyAttachments(this);
+		mesh->configure();
+
+		return mesh.get();
+	}
+
 	std::string toString() const {
 		std::ostringstream oss;
 		oss << "HeightField[" << endl

From 6d52ca9355e5202e39853e515ac6b7ac9cca8e3f Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Tue, 17 Sep 2013 15:21:25 +0200
Subject: [PATCH 28/43] minor parameter constness fix

---
 include/mitsuba/core/spline.h | 6 +++---
 src/libcore/spline.cpp        | 4 ++--
 src/libcore/util.cpp          | 1 -
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/mitsuba/core/spline.h b/include/mitsuba/core/spline.h
index f18591b5..95dbf13d 100644
--- a/include/mitsuba/core/spline.h
+++ b/include/mitsuba/core/spline.h
@@ -156,7 +156,7 @@ extern MTS_EXPORT_CORE Float integrateCubicInterp1DN(size_t idx,
  * \return
  *      The sampled position
  */
-extern MTS_EXPORT_CORE Float sampleCubicInterp1D(size_t idx, Float *values,
+extern MTS_EXPORT_CORE Float sampleCubicInterp1D(size_t idx, const Float *values,
 		size_t size, Float min, Float max, Float sample, Float *fval = NULL);
 
 /**
@@ -182,8 +182,8 @@ extern MTS_EXPORT_CORE Float sampleCubicInterp1D(size_t idx, Float *values,
  * \return
  *      The sampled position
  */
-extern MTS_EXPORT_CORE Float sampleCubicInterp1DN(size_t idx, Float *nodes,
-		Float *values, size_t size, Float sample, Float *fval = NULL);
+extern MTS_EXPORT_CORE Float sampleCubicInterp1DN(size_t idx, const Float *nodes,
+		const Float *values, size_t size, Float sample, Float *fval = NULL);
 
 /**
  * \brief Evaluate a cubic spline interpolant of a uniformly sampled 2D function
diff --git a/src/libcore/spline.cpp b/src/libcore/spline.cpp
index 9c137c72..f6621bed 100644
--- a/src/libcore/spline.cpp
+++ b/src/libcore/spline.cpp
@@ -130,7 +130,7 @@ Float integrateCubicInterp1DN(size_t idx, const Float *nodes, const Float *value
 	return ((d0-d1) * (Float) (1.0 / 12.0) + (f0+f1) * 0.5f) * width;
 }
 
-Float sampleCubicInterp1D(size_t idx, Float *values, size_t size, Float min,
+Float sampleCubicInterp1D(size_t idx, const Float *values, size_t size, Float min,
 		Float max, Float sample, Float *fval) {
 	Float f0 = values[idx], f1 = values[idx+1], d0, d1;
 
@@ -180,7 +180,7 @@ Float sampleCubicInterp1D(size_t idx, Float *values, size_t size, Float min,
 	}
 }
 
-Float sampleCubicInterp1DN(size_t idx, Float *nodes, Float *values,
+Float sampleCubicInterp1DN(size_t idx, const Float *nodes, const Float *values,
 		size_t size, Float sample, Float *fval) {
 	Float f0       = values[idx],
 	      f1       = values[idx+1],
diff --git a/src/libcore/util.cpp b/src/libcore/util.cpp
index 6a90128b..3d321cd4 100644
--- a/src/libcore/util.cpp
+++ b/src/libcore/util.cpp
@@ -834,7 +834,6 @@ std::string memString(size_t size, bool precise) {
 	return os.str();
 }
 
-
 Float hypot2(Float a, Float b) {
 	Float r;
 	if (std::abs(a) > std::abs(b)) {

From 773085525f5b274a39945aa2cc01edaba842c841 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Wed, 25 Sep 2013 15:12:24 +0200
Subject: [PATCH 29/43] minor double precision compilation fix

---
 src/shapes/heightfield.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
index 99697e66..26b246c1 100644
--- a/src/shapes/heightfield.cpp
+++ b/src/shapes/heightfield.cpp
@@ -494,7 +494,7 @@ public:
 				m_levelSize[0].x / cur.x,
 				m_levelSize[0].y / cur.y
 			);
-			m_blockSizeF[level] = Vector2f(m_blockSize[level]);
+			m_blockSizeF[level] = Vector2(m_blockSize[level]);
 
 			/* Allocate memory for interval data */
 			Interval *prevBounds = m_minmax[level-1], *curBounds;

From ab4525afba1a6622ceb195108e6afa7c4c35914d Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Thu, 26 Sep 2013 16:45:28 +0200
Subject: [PATCH 30/43] photonmapper: fixed shading on the backside of diffuse
 surfaces

---
 src/integrators/photonmapper/photonmapper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/integrators/photonmapper/photonmapper.cpp b/src/integrators/photonmapper/photonmapper.cpp
index a1849833..9b205b67 100644
--- a/src/integrators/photonmapper/photonmapper.cpp
+++ b/src/integrators/photonmapper/photonmapper.cpp
@@ -445,7 +445,7 @@ public:
 		/* Exhaustively recurse into all specular lobes? */
 		bool exhaustiveSpecular = rRec.depth < m_maxSpecularDepth && !cacheQuery;
 
-		if (isDiffuse) {
+		if (isDiffuse && (dot(its.shFrame.n, ray.d) < 0 || (bsdf->getType() & BSDF::EBackSide))) {
 			/* 1. Diffuse indirect */
 			int maxDepth = m_maxDepth == -1 ? INT_MAX : (m_maxDepth-rRec.depth);
 			if (rRec.type & RadianceQueryRecord::EIndirectSurfaceRadiance && m_globalPhotonMap.get())

From 126aa5e885ef82221d9957e465f187a37110c5c6 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Fri, 27 Sep 2013 15:40:53 +0200
Subject: [PATCH 31/43] libpython: initial support for emitters

---
 src/libpython/render.cpp | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/src/libpython/render.cpp b/src/libpython/render.cpp
index 9b9f4bcf..be6d4c22 100644
--- a/src/libpython/render.cpp
+++ b/src/libpython/render.cpp
@@ -351,7 +351,45 @@ void export_render() {
 		.def("serialize", triMesh_serialize2)
 		.def("writeOBJ", &TriMesh::writeOBJ);
 
-	BP_CLASS(Sensor, ConfigurableObject, bp::no_init) // incomplete
+	Shape *(AbstractEmitter::*abstractemitter_getShape)(void) = &AbstractEmitter::getShape;
+	Medium *(AbstractEmitter::*abstractemitter_getMedium)(void) = &AbstractEmitter::getMedium;
+
+	BP_CLASS(AbstractEmitter, ConfigurableObject, bp::no_init)
+		.def("getType", &AbstractEmitter::getType)
+		.def("setWorldTransform", &AbstractEmitter::setWorldTransform)
+		.def("getWorldTransform", &AbstractEmitter::getWorldTransform, BP_RETURN_VALUE)
+		.def("isOnSurface", &AbstractEmitter::isOnSurface)
+		.def("needsPositionSample", &AbstractEmitter::needsPositionSample)
+		.def("needsDirectionSample", &AbstractEmitter::needsDirectionSample)
+		.def("needsDirectSample", &AbstractEmitter::needsDirectSample)
+		.def("getDirectMeasure", &AbstractEmitter::getDirectMeasure)
+		.def("isDegenerate", &AbstractEmitter::isDegenerate)
+		.def("getShape", abstractemitter_getShape, BP_RETURN_VALUE)
+		.def("getMedium", abstractemitter_getMedium, BP_RETURN_VALUE)
+		.def("createShape", &AbstractEmitter::createShape, BP_RETURN_VALUE)
+		.def("getAABB", &AbstractEmitter::getAABB, BP_RETURN_VALUE);
+
+	BP_SETSCOPE(AbstractEmitter_class);
+	bp::enum_<AbstractEmitter::EEmitterType>("EEmitterType")
+		.value("EDeltaDirection,", AbstractEmitter::EDeltaDirection)
+		.value("EDeltaPosition,", AbstractEmitter::EDeltaPosition)
+		.value("EOnSurface,", AbstractEmitter::EOnSurface)
+		.export_values();
+	BP_SETSCOPE(renderModule);
+
+	BP_CLASS(Emitter, AbstractEmitter, bp::no_init) // incomplete
+		.def("eval", &Emitter::eval, BP_RETURN_VALUE)
+		.def("getSamplingWeight", &Emitter::getSamplingWeight)
+		.def("isEnvironmentEmitter", &Emitter::isEnvironmentEmitter)
+		.def("evalEnvironment", &Emitter::evalEnvironment, BP_RETURN_VALUE);
+
+	BP_SETSCOPE(Emitter_class);
+	bp::enum_<Emitter::EEmitterFlags>("EEmitterFlags")
+		.value("EEnvironmentEmitter,", Emitter::EEnvironmentEmitter)
+		.export_values();
+	BP_SETSCOPE(renderModule);
+
+	BP_CLASS(Sensor, AbstractEmitter, bp::no_init) // incomplete
 		.def("getShutterOpen", &Sensor::getShutterOpen)
 		.def("setShutterOpen", &Sensor::setShutterOpen)
 		.def("getShutterOpenTime", &Sensor::getShutterOpenTime)

From 24720d5bd3961c4e8899015005b2bc370b6b7a38 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Mon, 30 Sep 2013 11:16:26 +0200
Subject: [PATCH 32/43] Fixes bug #203: Fog material reference inside sensor
 disappears

---
 src/mtsgui/save.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/mtsgui/save.cpp b/src/mtsgui/save.cpp
index 3be9f008..5eeee778 100644
--- a/src/mtsgui/save.cpp
+++ b/src/mtsgui/save.cpp
@@ -231,6 +231,16 @@ void saveScene(QWidget *parent, SceneContext *ctx, const QString &targetFile) {
 	setProperties(ctx->doc, rfilter,
 		ctx->scene->getFilm()->getReconstructionFilter()->getProperties());
 
+	// ====================================================================
+	//   Serialize medium references of the sensor
+	// ====================================================================
+
+	QList<QDomElement> oldSensorReferences = findAllChildren(oldSensor, "ref");
+	oldSensorReferences.append(findAllChildren(oldSensor, "medium"));
+
+	for (int i=0; i<oldSensorReferences.size(); ++i)
+		sensor.appendChild(ctx->doc.importNode(oldSensorReferences[i], true));
+
 	// ====================================================================
 	//   Serialize the integrator configuration
 	// ====================================================================

From f02f0ac8015163be0c67ca0cdb74c4d23a630f8b Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Mon, 30 Sep 2013 12:20:06 +0200
Subject: [PATCH 33/43] libpython: Film support

---
 src/libpython/render.cpp | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/src/libpython/render.cpp b/src/libpython/render.cpp
index be6d4c22..e25b0726 100644
--- a/src/libpython/render.cpp
+++ b/src/libpython/render.cpp
@@ -395,6 +395,26 @@ void export_render() {
 		.def("getShutterOpenTime", &Sensor::getShutterOpenTime)
 		.def("setShutterOpenTime", &Sensor::setShutterOpenTime);
 
+	void (Film::*film_develop1)(const Scene *scene, Float renderTime) = &Film::develop;
+	bool (Film::*film_develop2)(const Point2i &offset, const Vector2i &size,
+		const Point2i &targetOffset, Bitmap *target) const = &Film::develop;
+	ReconstructionFilter *(Film::*film_getreconstructionfilter)() = &Film::getReconstructionFilter;
+
+	BP_CLASS(Film, ConfigurableObject, bp::no_init)
+		.def("getSize", &Film::getSize, BP_RETURN_VALUE)
+		.def("getCropSize", &Film::getCropSize, BP_RETURN_VALUE)
+		.def("getCropOffset", &Film::getCropOffset, BP_RETURN_VALUE)
+		.def("clear", &Film::clear)
+		.def("setBitmap", &Film::setBitmap)
+		.def("addBitmap", &Film::addBitmap)
+		.def("setDestinationFile", &Film::setDestinationFile)
+		.def("develop", film_develop1)
+		.def("develop", film_develop2)
+		.def("destinationExists", &Film::destinationExists)
+		.def("hasHighQualityEdges", &Film::hasHighQualityEdges)
+		.def("hasAlpha", &Film::hasAlpha)
+		.def("getReconstructionFilter", film_getreconstructionfilter, BP_RETURN_VALUE);
+
 	void (ProjectiveCamera::*projectiveCamera_setWorldTransform1)(const Transform &) = &ProjectiveCamera::setWorldTransform;
 	void (ProjectiveCamera::*projectiveCamera_setWorldTransform2)(AnimatedTransform *) = &ProjectiveCamera::setWorldTransform;
 	const Transform (ProjectiveCamera::*projectiveCamera_getWorldTransform1)(Float t) const = &ProjectiveCamera::getWorldTransform;

From cd435d81669f845216696c4426bce9b2e423e834 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Tue, 1 Oct 2013 12:21:21 +0200
Subject: [PATCH 34/43] fix an issue where the reinhard tonemapper reacted to
 the rendering progress indicators

---
 src/mtsgui/mainwindow.cpp            | 28 ++++++++++++++++++----------
 src/mtsgui/shaders/logluminance.frag |  5 +++--
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/src/mtsgui/mainwindow.cpp b/src/mtsgui/mainwindow.cpp
index c96c2366..fa5305b1 100644
--- a/src/mtsgui/mainwindow.cpp
+++ b/src/mtsgui/mainwindow.cpp
@@ -1727,6 +1727,9 @@ void MainWindow::onWorkBegin(const RenderJob *job, const RectangularWorkUnit *wu
 void MainWindow::drawVisualWorkUnit(SceneContext *context, const VisualWorkUnit &vwu) {
 	int ox = vwu.offset.x, oy = vwu.offset.y,
 		ex = ox + vwu.size.x, ey = oy + vwu.size.y;
+	if (vwu.size.x < 3 || vwu.size.y < 3)
+		return;
+
 	const float *color = NULL;
 
 	/* Use desaturated colors to highlight the host
@@ -1754,17 +1757,22 @@ void MainWindow::drawVisualWorkUnit(SceneContext *context, const VisualWorkUnit
 			break;
 	}
 
-	if (vwu.size.x < 3 || vwu.size.y < 3)
-		return;
+	float scale = .7f * (color[0] * 0.212671f + color[1] * 0.715160f + color[2] * 0.072169f);
 
-	drawHLine(context, ox, oy, ox + 3, color);
-	drawHLine(context, ex - 4, oy, ex - 1, color);
-	drawHLine(context, ox, ey - 1, ox + 3, color);
-	drawHLine(context, ex - 4, ey - 1, ex - 1, color);
-	drawVLine(context, ox, oy, oy + 3, color);
-	drawVLine(context, ex - 1, oy, oy + 3, color);
-	drawVLine(context, ex - 1, ey - 4, ey - 1, color);
-	drawVLine(context, ox, ey - 4, ey - 1, color);
+	float ncol[3] = {
+		color[0]*scale,
+		color[1]*scale,
+		color[2]*scale
+	};
+
+	drawHLine(context, ox, oy, ox + 3, ncol);
+	drawHLine(context, ex - 4, oy, ex - 1, ncol);
+	drawHLine(context, ox, ey - 1, ox + 3, ncol);
+	drawHLine(context, ex - 4, ey - 1, ex - 1, ncol);
+	drawVLine(context, ox, oy, oy + 3, ncol);
+	drawVLine(context, ex - 1, oy, oy + 3, ncol);
+	drawVLine(context, ex - 1, ey - 4, ey - 1, ncol);
+	drawVLine(context, ox, ey - 4, ey - 1, ncol);
 }
 
 void MainWindow::onWorkCanceled(const RenderJob *job, const Point2i &offset, const Vector2i &size) {
diff --git a/src/mtsgui/shaders/logluminance.frag b/src/mtsgui/shaders/logluminance.frag
index 1f6a9d5e..071968be 100644
--- a/src/mtsgui/shaders/logluminance.frag
+++ b/src/mtsgui/shaders/logluminance.frag
@@ -3,9 +3,10 @@ uniform float multiplier;
 
 void main() {
 	vec4 color = texture2D(source, gl_TexCoord[0].xy);
-	float luminance = multiplier * (color.r * 0.212671 + color.g * 0.715160 + color.b * 0.072169);
-	if (luminance < 0.0 || luminance != luminance || luminance == 1024.0)
+	float luminance = color.r * 0.212671 + color.g * 0.715160 + color.b * 0.072169;
+	if (luminance < 0.0 || luminance != luminance || luminance == 1024.0 || abs(luminance-.7) < 1e-4)
 		luminance = 0.0; // catch NaNs, negatives, and the Mitsuba banner
+	luminance = luminance * multiplier;
 	float logLuminance = log(1e-3 + luminance);
 	gl_FragColor = vec4(logLuminance, luminance, 0.0, 1.0);
 }

From 8ec217df22179b07885be89c6d3de6488cb20027 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Thu, 3 Oct 2013 22:57:51 +0200
Subject: [PATCH 35/43] new clang 10.8 compiler setting for 64bit

---
 build/config-macos10.8-gcc-x86_64.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 build/config-macos10.8-gcc-x86_64.py

diff --git a/build/config-macos10.8-gcc-x86_64.py b/build/config-macos10.8-gcc-x86_64.py
new file mode 100644
index 00000000..f7ea15b3
--- /dev/null
+++ b/build/config-macos10.8-gcc-x86_64.py
@@ -0,0 +1,27 @@
+BUILDDIR       = '#build/release'
+DISTDIR        = '#Mitsuba.app'
+CXX            = 'clang++'
+CC             = 'clang'
+CCFLAGS        = ['-arch', 'x86_64', '-mmacosx-version-min=10.8', '-march=nocona', '-msse2', '-ftree-vectorize', '-funsafe-math-optimizations', '-fno-math-errno', '-isysroot', '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.8.sdk', '-O3', '-Wall', '-g', '-pipe', '-DMTS_DEBUG', '-DSINGLE_PRECISION', '-DSPECTRUM_SAMPLES=3', '-DMTS_SSE', '-DMTS_HAS_COHERENT_RT', '-fstrict-aliasing', '-fsched-interblock', '-freorder-blocks', '-fvisibility=hidden', '-ftemplate-depth=512']
+LINKFLAGS      = ['-framework', 'OpenGL', '-framework', 'Cocoa', '-arch', 'x86_64', '-mmacosx-version-min=10.8', '-Wl,-syslibroot,/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.8.sdk', '-Wl,-headerpad,128']
+BASEINCLUDE    = ['#include', '#dependencies/include']
+BASELIBDIR     = ['#dependencies/lib']
+BASELIB        = ['m', 'pthread', 'Half']
+OEXRINCLUDE    = ['#dependencies/include/OpenEXR']
+OEXRLIB        = ['IlmImf', 'Imath', 'Iex', 'z']
+PNGLIB         = ['png']
+JPEGLIB        = ['jpeg']
+XERCESLIB      = ['xerces-c']
+GLLIB          = ['GLEWmx', 'objc']
+GLFLAGS        = ['-DGLEW_MX']
+BOOSTINCLUDE   = ['#dependencies']
+BOOSTLIB       = ['boost_filesystem', 'boost_system', 'boost_thread']
+PYTHON26INCLUDE= ['/System/Library/Frameworks/Python.framework/Versions/2.6/Headers']
+PYTHON26LIBDIR = ['/System/Library/Frameworks/Python.framework/Versions/2.6/lib']
+PYTHON26LIB    = ['boost_python26', 'boost_system', 'python2.6']
+PYTHON27INCLUDE= ['/System/Library/Frameworks/Python.framework/Versions/2.7/Headers']
+PYTHON27LIBDIR = ['/System/Library/Frameworks/Python.framework/Versions/2.7/lib']
+PYTHON27LIB    = ['boost_python27', 'boost_system', 'python2.7']
+COLLADAINCLUDE = ['#dependencies/include/collada-dom', '#dependencies/include/collada-dom/1.4']
+COLLADALIB     = ['collada14dom23']
+QTDIR          = '#dependencies'

From b6457f1bdbba3ad54cb65743b30aae9cd967634f Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Fri, 4 Oct 2013 17:37:30 +0200
Subject: [PATCH 36/43] added python v3.3 to SConscript.configure

---
 build/SConscript.configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build/SConscript.configure b/build/SConscript.configure
index 73d917be..9d4c1517 100644
--- a/build/SConscript.configure
+++ b/build/SConscript.configure
@@ -30,7 +30,7 @@ if needsBuildDependencies and not os.path.exists(GetBuildPath('#dependencies')):
 	print 'at http://www.mitsuba-renderer.org/docs.html for details on how to get them.\n'
 	Exit(1)
 
-python_versions = ["2.6", "2.7", "3.0", "3.1", "3.2"]
+python_versions = ["2.6", "2.7", "3.0", "3.1", "3.2", "3.3"]
 
 # Parse configuration options
 vars = Variables(configFile)

From 9c04346391ad6e60d99105d75bb339c563919a6b Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Fri, 4 Oct 2013 17:47:40 +0200
Subject: [PATCH 37/43] updated boost python naming scheme to match current
 Debian/Ubuntu releases

---
 build/config-linux-gcc-debug.py | 2 +-
 build/config-linux-gcc.py       | 2 +-
 build/config-linux-icl.py       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/build/config-linux-gcc-debug.py b/build/config-linux-gcc-debug.py
index 109f06d8..cd6ce15d 100644
--- a/build/config-linux-gcc-debug.py
+++ b/build/config-linux-gcc-debug.py
@@ -31,7 +31,7 @@ pyver = os.popen("python --version 2>&1 | grep -oE '([[:digit:]].[[:digit:]])'")
 env = locals()
 
 env['PYTHON'+pyver+'INCLUDE']  = []
-env['PYTHON'+pyver+'LIB']      = ['boost_python3' if pyver[0] == '3' else 'boost_python']
+env['PYTHON'+pyver+'LIB']      = ['boost_python-mt-py'+pyver]
 
 for entry in os.popen("python-config --cflags --libs").read().split():
 	if entry[:2] == '-I':
diff --git a/build/config-linux-gcc.py b/build/config-linux-gcc.py
index d81b3408..0567f27c 100644
--- a/build/config-linux-gcc.py
+++ b/build/config-linux-gcc.py
@@ -31,7 +31,7 @@ pyver = os.popen("python --version 2>&1 | grep -oE '([[:digit:]].[[:digit:]])'")
 env = locals()
 
 env['PYTHON'+pyver+'INCLUDE']  = []
-env['PYTHON'+pyver+'LIB']      = ['boost_python3' if pyver[0] == '3' else 'boost_python']
+env['PYTHON'+pyver+'LIB']      = ['boost_python-mt-py'+pyver]
 
 for entry in os.popen("python-config --cflags --libs").read().split():
 	if entry[:2] == '-I':
diff --git a/build/config-linux-icl.py b/build/config-linux-icl.py
index 10baef50..f1b12fc2 100644
--- a/build/config-linux-icl.py
+++ b/build/config-linux-icl.py
@@ -31,7 +31,7 @@ pyver = os.popen("python --version 2>&1 | grep -oE '([[:digit:]].[[:digit:]])'")
 env = locals()
 
 env['PYTHON'+pyver+'INCLUDE']  = []
-env['PYTHON'+pyver+'LIB']      = ['boost_python3' if pyver[0] == '3' else 'boost_python']
+env['PYTHON'+pyver+'LIB']      = ['boost_python-mt-py'+pyver]
 
 for entry in os.popen("python-config --cflags --libs").read().split():
 	if entry[:2] == '-I':

From e8bc5aeb2ad2a0058ca43b8332ce5fee79c3cf14 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Fri, 4 Oct 2013 18:51:19 +0200
Subject: [PATCH 38/43] added heightfield primitive to CMakeLists

---
 src/shapes/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/shapes/CMakeLists.txt b/src/shapes/CMakeLists.txt
index 6c73ab39..526ca7fb 100644
--- a/src/shapes/CMakeLists.txt
+++ b/src/shapes/CMakeLists.txt
@@ -19,6 +19,7 @@ add_shape(cube       cube.cpp)
 add_shape(hair       hair.h hair.cpp)
 add_shape(shapegroup shapegroup.h shapegroup.cpp)
 add_shape(instance   instance.h instance.cpp)
+add_shape(heightfield heightfield.cpp)
 #add_shape(deformable deformable.cpp)
 add_shape(ply ply.cpp ply/ply_parser.cpp 
   ply/byte_order.hpp ply/config.hpp ply/io_operators.hpp

From 8ef468b2bc7d1de7dc119e9c31be725d494574c9 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Thu, 10 Oct 2013 15:22:31 +0200
Subject: [PATCH 39/43] heightfield: compute normals in parallel

---
 src/shapes/heightfield.cpp | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
index 26b246c1..d2d87dbc 100644
--- a/src/shapes/heightfield.cpp
+++ b/src/shapes/heightfield.cpp
@@ -527,20 +527,28 @@ public:
 			memset(m_normals, 0, size);
 			storageSize += size;
 
-			for (int y=0; y<m_levelSize[0].y; ++y) {
-				for (int x=0; x<m_levelSize[0].x; ++x) {
-					Float f00 = m_data[y * m_dataSize.x + x];
-					Float f10 = m_data[y * m_dataSize.x + x + 1];
-					Float f01 = m_data[(y + 1) * m_dataSize.x + x];
-					Float f11 = m_data[(y + 1) * m_dataSize.x + x + 1];
+			for (int offset=0; offset<1; ++offset) {
+				#if defined(MTS_OPENMP)
+					#pragma omp parallel for
+				#endif
+				for (int y=offset; y<m_levelSize[0].y; y+=2) {
+					for (int x=0; x<m_levelSize[0].x; ++x) {
+							Float f00 = m_data[y * m_dataSize.x + x];
+						Float f10 = m_data[y * m_dataSize.x + x + 1];
+						Float f01 = m_data[(y + 1) * m_dataSize.x + x];
+						Float f11 = m_data[(y + 1) * m_dataSize.x + x + 1];
 
-					m_normals[y       * m_dataSize.x + x]     += normalize(Normal(f00 - f10, f00 - f01, 1));
-					m_normals[y       * m_dataSize.x + x + 1] += normalize(Normal(f00 - f10, f10 - f11, 1));
-					m_normals[(y + 1) * m_dataSize.x + x]     += normalize(Normal(f01 - f11, f00 - f01, 1));
-					m_normals[(y + 1) * m_dataSize.x + x + 1] += normalize(Normal(f01 - f11, f10 - f11, 1));
+						m_normals[y       * m_dataSize.x + x]     += normalize(Normal(f00 - f10, f00 - f01, 1));
+						m_normals[y       * m_dataSize.x + x + 1] += normalize(Normal(f00 - f10, f10 - f11, 1));
+						m_normals[(y + 1) * m_dataSize.x + x]     += normalize(Normal(f01 - f11, f00 - f01, 1));
+						m_normals[(y + 1) * m_dataSize.x + x + 1] += normalize(Normal(f01 - f11, f10 - f11, 1));
+					}
 				}
 			}
 
+			#if defined(MTS_OPENMP)
+				#pragma omp parallel for
+			#endif
 			for (int y=0; y<m_dataSize.y; ++y) {
 				for (int x=0; x<m_dataSize.x; ++x) {
 					Normal &normal = m_normals[x + y * m_dataSize.x];

From f831e0ba9966b7a82f2a340bb63e18b22626be56 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Thu, 10 Oct 2013 15:32:01 +0200
Subject: [PATCH 40/43] bugfix

---
 src/shapes/heightfield.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/shapes/heightfield.cpp b/src/shapes/heightfield.cpp
index d2d87dbc..16deafab 100644
--- a/src/shapes/heightfield.cpp
+++ b/src/shapes/heightfield.cpp
@@ -527,7 +527,7 @@ public:
 			memset(m_normals, 0, size);
 			storageSize += size;
 
-			for (int offset=0; offset<1; ++offset) {
+			for (int offset=0; offset<2; ++offset) {
 				#if defined(MTS_OPENMP)
 					#pragma omp parallel for
 				#endif

From 6baac6fb73ab59e7a8071880ec1406133daebc96 Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Tue, 15 Oct 2013 15:02:29 +0200
Subject: [PATCH 41/43] disk/rectangle shape: be less paranoid about shear

---
 src/shapes/disk.cpp      | 2 +-
 src/shapes/rectangle.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/shapes/disk.cpp b/src/shapes/disk.cpp
index 7d575ebc..922f8e83 100644
--- a/src/shapes/disk.cpp
+++ b/src/shapes/disk.cpp
@@ -105,7 +105,7 @@ public:
 		Vector dpdu = trafo(Vector(1, 0, 0));
 		Vector dpdv = trafo(Vector(0, 1, 0));
 
-		if (std::abs(dot(dpdu, dpdv)) > 1e-3f)
+		if (std::abs(dot(normalize(dpdu), normalize(dpdv))) > 1e-3f)
 			Log(EError, "Error: 'toWorld' transformation contains shear!");
 
 		if (std::abs(dpdu.length() / dpdv.length() - 1) > 1e-3f)
diff --git a/src/shapes/rectangle.cpp b/src/shapes/rectangle.cpp
index 2be94648..694d136d 100644
--- a/src/shapes/rectangle.cpp
+++ b/src/shapes/rectangle.cpp
@@ -105,7 +105,7 @@ public:
 		m_frame = Frame(normalize(m_dpdu), normalize(m_dpdv), normal);
 
 		m_invSurfaceArea = 1.0f / getSurfaceArea();
-		if (std::abs(dot(m_dpdu, m_dpdv)) > Epsilon)
+		if (std::abs(dot(normalize(m_dpdu), normalize(m_dpdv))) > Epsilon)
 			Log(EError, "Error: 'toWorld' transformation contains shear!");
 	}
 

From 6f81b5623b72ff75d687e2264a6716ab52e4b39e Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Fri, 18 Oct 2013 09:29:28 +0200
Subject: [PATCH 42/43] python documentation fix

---
 doc/python.tex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/python.tex b/doc/python.tex
index 43227b7d..6627e035 100644
--- a/doc/python.tex
+++ b/doc/python.tex
@@ -153,7 +153,7 @@ scheduler = Scheduler.getInstance()
 
 # Start up the scheduling system with one worker per local core
 for i in range(0, multiprocessing.cpu_count()):
-	scheduler.registerWorker(LocalWorker('wrk%i' % i))
+	scheduler.registerWorker(LocalWorker(i, 'wrk%i' % i))
 scheduler.start()
 
 # Create a queue for tracking render jobs

From 480e3eb9d50d2af763fe0f40c9e629207209af1b Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel@cs.cornell.edu>
Date: Fri, 18 Oct 2013 11:25:47 +0200
Subject: [PATCH 43/43] render settings dialog: always preserve the crop window

---
 src/librender/scenehandler.cpp   | 3 +--
 src/mtsgui/rendersettingsdlg.cpp | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/librender/scenehandler.cpp b/src/librender/scenehandler.cpp
index c41a7ae9..8c27d3bb 100644
--- a/src/librender/scenehandler.cpp
+++ b/src/librender/scenehandler.cpp
@@ -102,11 +102,10 @@ SceneHandler::SceneHandler(const SAXParser *parser,
 	m_tags["blackbody"]  = TagEntry(EBlackBody,  (Class *) NULL);
 	m_tags["spectrum"]   = TagEntry(ESpectrum,   (Class *) NULL);
 	m_tags["transform"]  = TagEntry(ETransform,  (Class *) NULL);
-	m_tags["animation"] = TagEntry(EAnimation, (Class *) NULL);
+	m_tags["animation"]  = TagEntry(EAnimation, (Class *) NULL);
 	m_tags["include"]    = TagEntry(EInclude,    (Class *) NULL);
 	m_tags["alias"]      = TagEntry(EAlias,      (Class *) NULL);
 
-
 	XMLTransService::Codes failReason;
 	m_transcoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(
 			"UTF-8", failReason, TRANSCODE_BLOCKSIZE);
diff --git a/src/mtsgui/rendersettingsdlg.cpp b/src/mtsgui/rendersettingsdlg.cpp
index 127a75c7..d8b145fb 100644
--- a/src/mtsgui/rendersettingsdlg.cpp
+++ b/src/mtsgui/rendersettingsdlg.cpp
@@ -385,7 +385,7 @@ void RenderSettingsDialog::apply(SceneContext *ctx) {
 	filmProps.setInteger("width", size.x, false);
 	filmProps.setInteger("height", size.y, false);
 
-	if (size.x != cropSize.x || size.y != cropSize.y) {
+	if (size.x != cropSize.x || size.y != cropSize.y || cropOffset.x != 0 || cropOffset.y != 0) {
 		filmProps.setInteger("cropWidth", cropSize.x, false);
 		filmProps.setInteger("cropHeight", cropSize.y, false);
 		filmProps.setInteger("cropOffsetX", cropOffset.x, false);