mitsuba-visualize/cameras.py

248 lines
11 KiB
Python

import sys
import argparse
from pathlib import Path
from typing import List
import math
import numpy as np
import xml.etree.ElementTree as etree
sys.path.append('')
import set_python_path
from mitsuba.core import Transform, Matrix4x4, Vector3, PluginManager, normalize, AnimatedTransform, Point3
from mitsuba.render import Sensor
import util
from math_util.catmull_rom_spline import catmull_rom_spline
from math_util.bezier_curve import bezier_curve
def convert_meshlab2mitsuba_camera(camera_def: dict) -> Transform:
"""
Takes a meshlab camera dict (usually loaded from a meshlab xml file) and turns it into a valid mitsuba Transform
which can then be applied to a sensor
:param camera_def: The camera def dict, containing at least keys RotationMatrix and TranslationVector
:return: A mitsuba transform constructed from the given values
"""
# Meshlab camera matrix is transposed
matrix = Matrix4x4([
float(elem) for elem in camera_def['RotationMatrix'].split(' ')[:16]
]).transpose()
# Add translation vector
translation = [-float(elem) for elem in camera_def['TranslationVector'].split(' ')]
for i in range(3):
matrix[i, 3] = translation[i]
# Make Mitsuba transform and flip rotation signs (y is not flipped, otherwise resulting image will be flipped vertically)
transform = Transform(matrix)
transform *= transform.scale(Vector3(-1, 1, -1))
return transform
def read_meshlab_sensor_transforms(file_path: Path) -> List[Transform]:
"""
Reads meshlab camera properties from an xml file. To obtain those, simply press ctrl/cmd+c in meshlab, and paste into a text editor.
If you paste multiple cameras into one file, make sure to have a valid document structure with one root node, e.g.
<!DOCTYPE ViewState>
<project>
<VCGCamera ... />
<ViewSettings ... />
</project>
:param file_path: Path to the xml file to read
:return: A list of Mitsuba transforms
"""
assert file_path.suffix == '.xml'
root_node = etree.parse(file_path).getroot()
transforms = [convert_meshlab2mitsuba_camera(elem.attrib) for elem in root_node if elem.tag == 'VCGCamera']
return transforms
def create_transform_on_bbsphere(bbox, radius_multiplier: float, positioning_vector: Vector3, tilt=None) -> Transform:
"""
Create a camera with origin on the bounding sphere around an object and looking towards the center of that sphere
Camera center is calculated as: bbsphere_center + radius_multiplier * bbsphere_radius * normalized_positioning_vector
:param bbox: The bounding box of the object from which to calculate the bounding sphere
:param radius_multiplier: The value to multiply the bounding sphere's radius with
(= distance of the camera origin to the center of the bounding sphere)
:param positioning_vector: The vector pointing towards the camera center
:param tilt: Transform to apply to camera origin. Usually a small rotation to tilt the camera perspective a little
:return: A transform on the bbsphere
"""
bsphere = bbox.getBSphere()
camera_target = bsphere.center
camera_offset = radius_multiplier * bsphere.radius * normalize(positioning_vector)
camera_origin = camera_target + camera_offset
camera_transform = Transform.lookAt(tilt * camera_origin if tilt is not None else camera_origin, camera_target, Vector3(0., 0., 1.))
return camera_transform
def create_trajectory_on_bbsphere(bbox, initial_positioning_vector: Vector3, rotation_around: Vector3, num_cameras: int,
radius_multiplier: float, tilt: Transform = None) -> List[Transform]:
"""
Creates an interpolated trajectory on the bounding sphere of a scene/object
:param bbox: The bounding box of the object/scene to render
:param initial_positioning_vector: Controls where to position the camera initially
:param rotation_around: Specify axis to rotate camera around
:param num_cameras: Number of cameras to generate
:param radius_multiplier: Cameras will be placed at distance multiplier * bbsphere radius
:param tilt: Additional transformation meant to tilt the camera a bit
:return: List with generated transforms
"""
transforms = []
step_angle = 360. / float(num_cameras)
for camera_idx in range(num_cameras):
transform = create_transform_on_bbsphere(bbox, radius_multiplier, Transform.rotate(rotation_around, step_angle * camera_idx) * tilt * initial_positioning_vector)
transforms.append(transform)
return transforms
def create_interpolated_trajectory(cameras: List[Transform], method: str,
num_cameras_per_m: int = 75, num_cameras_per_rad: int = 225,
num_total_cameras: int = 1000) -> List[Transform]:
"""
Creates an interpolated camera trajectory using supplied key camera transforms
:param cameras: Camera transformations to use as control points for interpolation
:param method: How to interpolate: *bezier* (used for both camera centers and rotations):
Smooth trajectory, but does not necessarily pass through the control points (except the first and last) or
*catmullrom* (used for camera centers, using quaternion slerp for rotations):
Passes through all control points, but needs more tuning to prevent weird behaviour
:param num_cameras_per_m: catmullrom parameter: Camera centers per meter
:param num_cameras_per_rad: catmullrom parameter: Camera rotations per radiant
:param num_total_cameras: bezier parameter: Number of interpolated cameras between first and last key camera pose
:return: A list of interpolated transforms
"""
all_interpolated = []
camera_pose_matrices = [util.convert_transform2numpy(camera) for camera in cameras]
if method == 'bezier':
interpolated_cameras = bezier_curve(camera_pose_matrices, num_steps=num_total_cameras)
for elem in interpolated_cameras:
position = Point3(*elem[:3, 3].tolist())
look = Vector3(*elem[:3, :3].dot(np.array([0, 0, 1])).tolist())
up = Vector3(*(elem[:3, :3].dot(np.array([0, 1, 0]))).tolist())
all_interpolated.append(Transform.lookAt(position, position + look, up))
elif method == 'catmullrom':
assert len(cameras) >= 4
camera_groups = [camera_pose_matrices[idx: idx + 4] for idx in range(len(cameras) - 3)]
for camera_group in camera_groups:
key_positions = (camera_group[1][:3, 3], camera_group[2][:3, 3])
key_looks = (camera_group[1][:3, :3] * np.array([0, 0, 1]), camera_group[2][:3, :3] * np.array([0, 0, 1]))
dist = np.linalg.norm(key_positions[1] - key_positions[0])
angle = math.acos(np.clip(np.sum(key_looks[1] @ key_looks[0]), -1., 1.))
num_t = int(np.round(dist / 100. * num_cameras_per_m))
num_r = int(np.round(angle * num_cameras_per_rad))
num = max(40, max(num_t, num_r))
interpolated_cameras = catmull_rom_spline(camera_group, num)
for elem in interpolated_cameras:
position = Point3(*elem[:3, 3].tolist())
look = Vector3(*elem[:3, :3].dot(np.array([0, 0, 1])).tolist())
up = Vector3(*(elem[:3, :3].dot(np.array([0, 1, 0]))).tolist())
all_interpolated.append(Transform.lookAt(position, position + look, up))
else:
raise NotImplementedError(f'The method you chose ({method}) is not implemented')
return all_interpolated
def create_animated_sensors(trajectory: List[Transform], shutter_time: float = 1., width: int = 1920, height: int = 1440,
fov: float = 45., num_samples: int = 256) -> List[Sensor]:
"""
Create an animated sensor (Applies motion blur to the rendered image)
:param trajectory: The trajectory containing all Transforms
:param shutter_time: The shutter time to be used to set the Mitsuba setShutterOpenTime
:param width: Width of the generated image
:param height: Height of the generated image
:param fov: The sensor field of view
:param num_samples: Number of samples per pixel (controls the noise in the resulting image)
:return: A list of Mitsuba animated sensors
"""
animated_sensors = []
for transform_idx in range(len(trajectory)):
atrafo = AnimatedTransform()
atrafo.appendTransform(0, trajectory[transform_idx])
atrafo.appendTransform(1, trajectory[min(transform_idx + 1, len(trajectory) - 1)])
atrafo.sortAndSimplify()
sensor = create_sensor_from_transform(atrafo, width, height, fov, num_samples)
sensor.setShutterOpen(0)
sensor.setShutterOpenTime(shutter_time)
animated_sensors.append(sensor)
return animated_sensors
def create_sensor_from_transform(transform, width=1920, height=1440, fov=45., num_samples=256) -> Sensor:
"""
Create a Mitsuba sensor from a camera transform
:param transform: The transform (camera to world) that this sensor uses
:param width: The width of the resulting image
:param height: The height of the resulting image
:param fov: The field of view in degrees (default 45, meshlab uses 60)
:param num_samples: Number of samples per pixel
:return: A Mitsuba sensor
"""
sensor = PluginManager.getInstance().create({
'type': 'perspective',
'film': {
'type': 'ldrfilm',
'width': width,
'height': height,
'pixelFormat': 'rgba',
'exposure': 1.0,
'banner': False
},
'sampler': {
'type': 'ldsampler',
'sampleCount': num_samples
},
'toWorld': transform,
'fov': fov,
})
return sensor
if __name__ == '__main__':
parser = argparse.ArgumentParser("Load and parse cameras from different formats for mitsuba. Called directly, it will print the parsed mitsuba camera")
parser.add_argument('camera_filename', type=str, help="The file containing camera definitions")
parser.add_argument('--type', default='meshlab', choices=['meshlab'], help="Which type the camera definitions are")
parser.add_argument('--interpolate', action='store_true', help="Whether to interpolate cameras between the given one (need at least 4 in this case)")
args = parser.parse_args()
camera_filepath = Path(args.camera_filename)
assert camera_filepath.is_file(), "Camera file has to exist"
if args.type == 'meshlab':
transforms = read_meshlab_sensor_transforms(camera_filepath)
else:
raise NotImplementedError
print(f"Read {len(transforms)} camera transformations from type {args.type}:")
print(transforms)
if args.interpolate:
interpolated_cameras = create_interpolated_trajectory(transforms, method='bezier')
points = np.array([elem[3, :3] for elem in interpolated_cameras])
from io_util import ply
ply.write_ply('/home/christian/tmp/cameras.ply', points=points, as_text=True)
print(f"Interpolated {len(interpolated_cameras)} camera transformations:")
print(interpolated_cameras)