mitsuba-visualize/cameras.py

import sys
import argparse
from pathlib import Path
from typing import List
import math

import numpy as np
import xml.etree.ElementTree as etree

sys.path.append('')
import set_python_path
from mitsuba.core import Transform, Matrix4x4, Vector3, PluginManager, normalize, AnimatedTransform, Point3
from mitsuba.render import Sensor

import util
from math_util.catmull_rom_spline import catmull_rom_spline
from math_util.bezier_curve import bezier_curve


def convert_meshlab2mitsuba_camera(camera_def: dict) -> Transform:
    """
    Takes a meshlab camera dict (usually loaded from a meshlab xml file) and turns it into a valid mitsuba Transform
    which can then be applied to a sensor
    :param camera_def: The camera def dict, containing at least keys RotationMatrix and TranslationVector
    :return: A mitsuba transform constructed from the given values
    """
    # Meshlab camera matrix is transposed
    matrix = Matrix4x4([
        float(elem) for elem in camera_def['RotationMatrix'].split(' ')[:16]
    ]).transpose()

    # Add translation vector
    translation = [-float(elem) for elem in camera_def['TranslationVector'].split(' ')]
    for i in range(3):
        matrix[i, 3] = translation[i]

    # Make Mitsuba transform and flip rotation signs (y is not flipped, otherwise resulting image will be flipped vertically)
    transform = Transform(matrix)
    transform *= transform.scale(Vector3(-1, 1, -1))

    return transform


def read_meshlab_sensor_transforms(file_path: Path) -> List[Transform]:
    """
    Reads meshlab camera properties from an xml file. To obtain those, simply press ctrl/cmd+c in meshlab, and paste into a text editor.
    If you paste multiple cameras into one file, make sure to have a valid document structure with one root node, e.g.
    <!DOCTYPE ViewState>
    <project>
     <VCGCamera ... />
     <ViewSettings ... />
    </project>

    :param file_path: Path to the xml file to read
    :return: A list of Mitsuba transforms
    """
    assert file_path.suffix == '.xml'
    root_node = etree.parse(file_path).getroot()

    transforms = [convert_meshlab2mitsuba_camera(elem.attrib) for elem in root_node if elem.tag == 'VCGCamera']

    return transforms


def create_transform_on_bbsphere(bbox, radius_multiplier: float, positioning_vector: Vector3, tilt=None) -> Transform:
    """
    Create a camera with origin on the bounding sphere around an object and looking towards the center of that sphere
    Camera center is calculated as: bbsphere_center + radius_multiplier * bbsphere_radius * normalized_positioning_vector
    :param bbox: The bounding box of the object from which to calculate the bounding sphere
    :param radius_multiplier: The value to multiply the bounding sphere's radius with
    (= distance of the camera origin to the center of the bounding sphere)
    :param positioning_vector: The vector pointing towards the camera center
    :param tilt: Transform to apply to camera origin. Usually a small rotation to tilt the camera perspective a little
    :return: A transform on the bbsphere
    """
    bsphere = bbox.getBSphere()
    camera_target = bsphere.center

    camera_offset = radius_multiplier * bsphere.radius * normalize(positioning_vector)
    camera_origin = camera_target + camera_offset

    camera_transform = Transform.lookAt(tilt * camera_origin if tilt is not None else camera_origin, camera_target, Vector3(0., 0., 1.))

    return camera_transform


def create_trajectory_on_bbsphere(bbox, initial_positioning_vector: Vector3, rotation_around: Vector3, num_cameras: int,
                                  radius_multiplier: float, tilt: Transform = None) -> List[Transform]:
    """
    Creates an interpolated trajectory on the bounding sphere of a scene/object
    :param bbox: The bounding box of the object/scene to render
    :param initial_positioning_vector: Controls where to position the camera initially
    :param rotation_around: Specify axis to rotate camera around
    :param num_cameras: Number of cameras to generate
    :param radius_multiplier: Cameras will be placed at distance multiplier * bbsphere radius
    :param tilt: Additional transformation meant to tilt the camera a bit
    :return: List with generated transforms
    """
    transforms = []

    step_angle = 360. / float(num_cameras)
    for camera_idx in range(num_cameras):
        transform = create_transform_on_bbsphere(bbox, radius_multiplier, Transform.rotate(rotation_around, step_angle * camera_idx) * tilt * initial_positioning_vector)
        transforms.append(transform)

    return transforms


def create_interpolated_trajectory(cameras: List[Transform], method: str,
                                   num_cameras_per_m: int = 75, num_cameras_per_rad: int = 225,
                                   num_total_cameras: int = 1000) -> List[Transform]:
    """
    Creates an interpolated camera trajectory using supplied key camera transforms
    :param cameras: Camera transformations to use as control points for interpolation
    :param method: How to interpolate: *bezier* (used for both camera centers and rotations):
    Smooth trajectory, but does not necessarily pass through the control points (except the first and last) or
    *catmullrom* (used for camera centers, using quaternion slerp for rotations):
    Passes through all control points, but needs more tuning to prevent weird behaviour
    :param num_cameras_per_m: catmullrom parameter: Camera centers per meter
    :param num_cameras_per_rad: catmullrom parameter: Camera rotations per radiant
    :param num_total_cameras: bezier parameter: Number of interpolated cameras between first and last key camera pose
    :return: A list of interpolated transforms
    """
    all_interpolated = []

    camera_pose_matrices = [util.convert_transform2numpy(camera) for camera in cameras]
    if method == 'bezier':
        interpolated_cameras = bezier_curve(camera_pose_matrices, num_steps=num_total_cameras)
        for elem in interpolated_cameras:
            position = Point3(*elem[:3, 3].tolist())
            look = Vector3(*elem[:3, :3].dot(np.array([0, 0, 1])).tolist())
            up = Vector3(*(elem[:3, :3].dot(np.array([0, 1, 0]))).tolist())
            all_interpolated.append(Transform.lookAt(position, position + look, up))

    elif method == 'catmullrom':
        assert len(cameras) >= 4
        camera_groups = [camera_pose_matrices[idx: idx + 4] for idx in range(len(cameras) - 3)]
        for camera_group in camera_groups:
            key_positions = (camera_group[1][:3, 3], camera_group[2][:3, 3])
            key_looks = (camera_group[1][:3, :3] * np.array([0, 0, 1]), camera_group[2][:3, :3] * np.array([0, 0, 1]))
            dist = np.linalg.norm(key_positions[1] - key_positions[0])
            angle = math.acos(np.clip(np.sum(key_looks[1] @ key_looks[0]), -1., 1.))

            num_t = int(np.round(dist / 100. * num_cameras_per_m))
            num_r = int(np.round(angle * num_cameras_per_rad))
            num = max(40, max(num_t, num_r))

            interpolated_cameras = catmull_rom_spline(camera_group, num)

            for elem in interpolated_cameras:
                position = Point3(*elem[:3, 3].tolist())
                look = Vector3(*elem[:3, :3].dot(np.array([0, 0, 1])).tolist())
                up = Vector3(*(elem[:3, :3].dot(np.array([0, 1, 0]))).tolist())
                all_interpolated.append(Transform.lookAt(position, position + look, up))

    else:
        raise NotImplementedError(f'The method you chose ({method}) is not implemented')

    return all_interpolated


def create_animated_sensors(trajectory: List[Transform], shutter_time: float = 1., width: int = 1920, height: int = 1440,
                            fov: float = 45., num_samples: int = 256) -> List[Sensor]:
    """
    Create an animated sensor (Applies motion blur to the rendered image)
    :param trajectory: The trajectory containing all Transforms
    :param shutter_time: The shutter time to be used to set the Mitsuba setShutterOpenTime
    :param width: Width of the generated image
    :param height: Height of the generated image
    :param fov: The sensor field of view
    :param num_samples: Number of samples per pixel (controls the noise in the resulting image)
    :return: A list of Mitsuba animated sensors
    """
    animated_sensors = []
    for transform_idx in range(len(trajectory)):
        atrafo = AnimatedTransform()
        atrafo.appendTransform(0, trajectory[transform_idx])
        atrafo.appendTransform(1, trajectory[min(transform_idx + 1, len(trajectory) - 1)])
        atrafo.sortAndSimplify()
        sensor = create_sensor_from_transform(atrafo, width, height, fov, num_samples)
        sensor.setShutterOpen(0)
        sensor.setShutterOpenTime(shutter_time)
        animated_sensors.append(sensor)

    return animated_sensors


def create_sensor_from_transform(transform, width=1920, height=1440, fov=45., num_samples=256) -> Sensor:
    """
    Create a Mitsuba sensor from a camera transform
    :param transform: The transform (camera to world) that this sensor uses
    :param width: The width of the resulting image
    :param height: The height of the resulting image
    :param fov: The field of view in degrees (default 45, meshlab uses 60)
    :param num_samples: Number of samples per pixel
    :return: A Mitsuba sensor
    """
    sensor = PluginManager.getInstance().create({
        'type': 'perspective',
        'film': {
            'type': 'ldrfilm',
            'width': width,
            'height': height,
            'pixelFormat': 'rgba',
            'exposure': 1.0,
            'banner': False
        },
        'sampler': {
            'type': 'ldsampler',
            'sampleCount': num_samples
        },
        'toWorld': transform,
        'fov': fov,
    })

    return sensor


if __name__ == '__main__':
    parser = argparse.ArgumentParser("Load and parse cameras from different formats for mitsuba. Called directly, it will print the parsed mitsuba camera")
    parser.add_argument('camera_filename', type=str, help="The file containing camera definitions")
    parser.add_argument('--type', default='meshlab', choices=['meshlab'], help="Which type the camera definitions are")
    parser.add_argument('--interpolate', action='store_true', help="Whether to interpolate cameras between the given one (need at least 4 in this case)")

    args = parser.parse_args()

    camera_filepath = Path(args.camera_filename)
    assert camera_filepath.is_file(), "Camera file has to exist"

    if args.type == 'meshlab':
        transforms = read_meshlab_sensor_transforms(camera_filepath)
    else:
        raise NotImplementedError

    print(f"Read {len(transforms)} camera transformations from type {args.type}:")
    print(transforms)

    if args.interpolate:
        interpolated_cameras = create_interpolated_trajectory(transforms, method='bezier')

        points = np.array([elem[3, :3] for elem in interpolated_cameras])
        from io_util import ply

        ply.write_ply('/home/christian/tmp/cameras.ply', points=points, as_text=True)

        print(f"Interpolated {len(interpolated_cameras)} camera transformations:")
        print(interpolated_cameras)
Initial Commit 2020-04-19 21:22:34 +08:00			`import sys`
			`import argparse`
			`from pathlib import Path`
			`from typing import List`
			`import math`

			`import numpy as np`
			`import xml.etree.ElementTree as etree`

			`sys.path.append('')`
			`import set_python_path`
			`from mitsuba.core import Transform, Matrix4x4, Vector3, PluginManager, normalize, AnimatedTransform, Point3`
			`from mitsuba.render import Sensor`

			`import util`
			`from math_util.catmull_rom_spline import catmull_rom_spline`
			`from math_util.bezier_curve import bezier_curve`


			`def convert_meshlab2mitsuba_camera(camera_def: dict) -> Transform:`
			`"""`
			`Takes a meshlab camera dict (usually loaded from a meshlab xml file) and turns it into a valid mitsuba Transform`
			`which can then be applied to a sensor`
			`:param camera_def: The camera def dict, containing at least keys RotationMatrix and TranslationVector`
			`:return: A mitsuba transform constructed from the given values`
			`"""`
			`# Meshlab camera matrix is transposed`
			`matrix = Matrix4x4([`
			`float(elem) for elem in camera_def['RotationMatrix'].split(' ')[:16]`
			`]).transpose()`

			`# Add translation vector`
			`translation = [-float(elem) for elem in camera_def['TranslationVector'].split(' ')]`
			`for i in range(3):`
			`matrix[i, 3] = translation[i]`

			`# Make Mitsuba transform and flip rotation signs (y is not flipped, otherwise resulting image will be flipped vertically)`
			`transform = Transform(matrix)`
			`transform *= transform.scale(Vector3(-1, 1, -1))`

			`return transform`


			`def read_meshlab_sensor_transforms(file_path: Path) -> List[Transform]:`
			`"""`
			`Reads meshlab camera properties from an xml file. To obtain those, simply press ctrl/cmd+c in meshlab, and paste into a text editor.`
			`If you paste multiple cameras into one file, make sure to have a valid document structure with one root node, e.g.`
			`<!DOCTYPE ViewState>`
			`<project>`
			`<VCGCamera ... />`
			`<ViewSettings ... />`
			`</project>`

			`:param file_path: Path to the xml file to read`
			`:return: A list of Mitsuba transforms`
			`"""`
			`assert file_path.suffix == '.xml'`
			`root_node = etree.parse(file_path).getroot()`

			`transforms = [convert_meshlab2mitsuba_camera(elem.attrib) for elem in root_node if elem.tag == 'VCGCamera']`

			`return transforms`


			`def create_transform_on_bbsphere(bbox, radius_multiplier: float, positioning_vector: Vector3, tilt=None) -> Transform:`
			`"""`
			`Create a camera with origin on the bounding sphere around an object and looking towards the center of that sphere`
			`Camera center is calculated as: bbsphere_center + radius_multiplier * bbsphere_radius * normalized_positioning_vector`
			`:param bbox: The bounding box of the object from which to calculate the bounding sphere`
			`:param radius_multiplier: The value to multiply the bounding sphere's radius with`
			`(= distance of the camera origin to the center of the bounding sphere)`
			`:param positioning_vector: The vector pointing towards the camera center`
			`:param tilt: Transform to apply to camera origin. Usually a small rotation to tilt the camera perspective a little`
			`:return: A transform on the bbsphere`
			`"""`
			`bsphere = bbox.getBSphere()`
			`camera_target = bsphere.center`

			`camera_offset = radius_multiplier * bsphere.radius * normalize(positioning_vector)`
			`camera_origin = camera_target + camera_offset`

			`camera_transform = Transform.lookAt(tilt * camera_origin if tilt is not None else camera_origin, camera_target, Vector3(0., 0., 1.))`

			`return camera_transform`


			`def create_trajectory_on_bbsphere(bbox, initial_positioning_vector: Vector3, rotation_around: Vector3, num_cameras: int,`
			`radius_multiplier: float, tilt: Transform = None) -> List[Transform]:`
			`"""`
			`Creates an interpolated trajectory on the bounding sphere of a scene/object`
			`:param bbox: The bounding box of the object/scene to render`
			`:param initial_positioning_vector: Controls where to position the camera initially`
			`:param rotation_around: Specify axis to rotate camera around`
			`:param num_cameras: Number of cameras to generate`
			`:param radius_multiplier: Cameras will be placed at distance multiplier * bbsphere radius`
			`:param tilt: Additional transformation meant to tilt the camera a bit`
			`:return: List with generated transforms`
			`"""`
			`transforms = []`

			`step_angle = 360. / float(num_cameras)`
			`for camera_idx in range(num_cameras):`
			`transform = create_transform_on_bbsphere(bbox, radius_multiplier, Transform.rotate(rotation_around, step_angle * camera_idx) * tilt * initial_positioning_vector)`
			`transforms.append(transform)`

			`return transforms`


			`def create_interpolated_trajectory(cameras: List[Transform], method: str,`
			`num_cameras_per_m: int = 75, num_cameras_per_rad: int = 225,`
			`num_total_cameras: int = 1000) -> List[Transform]:`
			`"""`
			`Creates an interpolated camera trajectory using supplied key camera transforms`
			`:param cameras: Camera transformations to use as control points for interpolation`
			`:param method: How to interpolate: bezier (used for both camera centers and rotations):`
			`Smooth trajectory, but does not necessarily pass through the control points (except the first and last) or`
			`catmullrom (used for camera centers, using quaternion slerp for rotations):`
			`Passes through all control points, but needs more tuning to prevent weird behaviour`
			`:param num_cameras_per_m: catmullrom parameter: Camera centers per meter`
			`:param num_cameras_per_rad: catmullrom parameter: Camera rotations per radiant`
			`:param num_total_cameras: bezier parameter: Number of interpolated cameras between first and last key camera pose`
			`:return: A list of interpolated transforms`
			`"""`
			`all_interpolated = []`

			`camera_pose_matrices = [util.convert_transform2numpy(camera) for camera in cameras]`
			`if method == 'bezier':`
			`interpolated_cameras = bezier_curve(camera_pose_matrices, num_steps=num_total_cameras)`
			`for elem in interpolated_cameras:`
			`position = Point3(*elem[:3, 3].tolist())`
			`look = Vector3(*elem[:3, :3].dot(np.array([0, 0, 1])).tolist())`
			`up = Vector3(*(elem[:3, :3].dot(np.array([0, 1, 0]))).tolist())`
			`all_interpolated.append(Transform.lookAt(position, position + look, up))`

			`elif method == 'catmullrom':`
			`assert len(cameras) >= 4`
			`camera_groups = [camera_pose_matrices[idx: idx + 4] for idx in range(len(cameras) - 3)]`
			`for camera_group in camera_groups:`
			`key_positions = (camera_group[1][:3, 3], camera_group[2][:3, 3])`
			`key_looks = (camera_group[1][:3, :3] * np.array([0, 0, 1]), camera_group[2][:3, :3] * np.array([0, 0, 1]))`
			`dist = np.linalg.norm(key_positions[1] - key_positions[0])`
			`angle = math.acos(np.clip(np.sum(key_looks[1] @ key_looks[0]), -1., 1.))`

			`num_t = int(np.round(dist / 100. * num_cameras_per_m))`
			`num_r = int(np.round(angle * num_cameras_per_rad))`
			`num = max(40, max(num_t, num_r))`

			`interpolated_cameras = catmull_rom_spline(camera_group, num)`

			`for elem in interpolated_cameras:`
			`position = Point3(*elem[:3, 3].tolist())`
			`look = Vector3(*elem[:3, :3].dot(np.array([0, 0, 1])).tolist())`
			`up = Vector3(*(elem[:3, :3].dot(np.array([0, 1, 0]))).tolist())`
			`all_interpolated.append(Transform.lookAt(position, position + look, up))`

			`else:`
			`raise NotImplementedError(f'The method you chose ({method}) is not implemented')`

			`return all_interpolated`


			`def create_animated_sensors(trajectory: List[Transform], shutter_time: float = 1., width: int = 1920, height: int = 1440,`
			`fov: float = 45., num_samples: int = 256) -> List[Sensor]:`
			`"""`
			`Create an animated sensor (Applies motion blur to the rendered image)`
			`:param trajectory: The trajectory containing all Transforms`
			`:param shutter_time: The shutter time to be used to set the Mitsuba setShutterOpenTime`
			`:param width: Width of the generated image`
			`:param height: Height of the generated image`
			`:param fov: The sensor field of view`
			`:param num_samples: Number of samples per pixel (controls the noise in the resulting image)`
			`:return: A list of Mitsuba animated sensors`
			`"""`
			`animated_sensors = []`
			`for transform_idx in range(len(trajectory)):`
			`atrafo = AnimatedTransform()`
			`atrafo.appendTransform(0, trajectory[transform_idx])`
			`atrafo.appendTransform(1, trajectory[min(transform_idx + 1, len(trajectory) - 1)])`
			`atrafo.sortAndSimplify()`
			`sensor = create_sensor_from_transform(atrafo, width, height, fov, num_samples)`
			`sensor.setShutterOpen(0)`
			`sensor.setShutterOpenTime(shutter_time)`
			`animated_sensors.append(sensor)`

			`return animated_sensors`


			`def create_sensor_from_transform(transform, width=1920, height=1440, fov=45., num_samples=256) -> Sensor:`
			`"""`
			`Create a Mitsuba sensor from a camera transform`
			`:param transform: The transform (camera to world) that this sensor uses`
			`:param width: The width of the resulting image`
			`:param height: The height of the resulting image`
			`:param fov: The field of view in degrees (default 45, meshlab uses 60)`
			`:param num_samples: Number of samples per pixel`
			`:return: A Mitsuba sensor`
			`"""`
			`sensor = PluginManager.getInstance().create({`
			`'type': 'perspective',`
			`'film': {`
			`'type': 'ldrfilm',`
			`'width': width,`
			`'height': height,`
			`'pixelFormat': 'rgba',`
			`'exposure': 1.0,`
			`'banner': False`
			`},`
			`'sampler': {`
			`'type': 'ldsampler',`
			`'sampleCount': num_samples`
			`},`
			`'toWorld': transform,`
			`'fov': fov,`
			`})`

			`return sensor`


			`if __name__ == '__main__':`
			`parser = argparse.ArgumentParser("Load and parse cameras from different formats for mitsuba. Called directly, it will print the parsed mitsuba camera")`
			`parser.add_argument('camera_filename', type=str, help="The file containing camera definitions")`
			`parser.add_argument('--type', default='meshlab', choices=['meshlab'], help="Which type the camera definitions are")`
			`parser.add_argument('--interpolate', action='store_true', help="Whether to interpolate cameras between the given one (need at least 4 in this case)")`

			`args = parser.parse_args()`

			`camera_filepath = Path(args.camera_filename)`
			`assert camera_filepath.is_file(), "Camera file has to exist"`

			`if args.type == 'meshlab':`
			`transforms = read_meshlab_sensor_transforms(camera_filepath)`
			`else:`
			`raise NotImplementedError`

			`print(f"Read {len(transforms)} camera transformations from type {args.type}:")`
			`print(transforms)`

			`if args.interpolate:`
			`interpolated_cameras = create_interpolated_trajectory(transforms, method='bezier')`

			`points = np.array([elem[3, :3] for elem in interpolated_cameras])`
			`from io_util import ply`

			`ply.write_ply('/home/christian/tmp/cameras.ply', points=points, as_text=True)`

			`print(f"Interpolated {len(interpolated_cameras)} camera transformations:")`
			`print(interpolated_cameras)`