Description
I captured RGB-D data and camera poses from Unity, then generated point clouds from the depth images. When I attempted to align the point clouds in a global coordinate frame using the camera poses for 3D reconstruction, the output was misaligned or distorted. Despite debugging, I couldn’t identify the root cause. A different approach also yielded unsatisfactory results.
Steps to reproduce the behavior:
- captured RGB-D and camera poses
`using UnityEngine;
using UnityEngine.Rendering.Universal;
using System.Collections;
using System.IO;
public class RGBDCapture : MonoBehaviour
{
public Camera cam;
private UniversalAdditionalCameraData cameraData;
private RenderTexture rgbRT;
private RenderTexture depthRT;
private int width = 640;
private int height = 480;
public int totalFrames = 30; // 你要采集的帧数
public float interval = 1.5f; // 每帧间隔秒数
private int currentFrame = 0;
private Quaternion? lastRotation = null;
private bool isCapturing = false;
void Start()
{
if (cam == null)
{
Debug.LogError("Camera not assigned!");
return;
}
cameraData = cam.GetComponent<UniversalAdditionalCameraData>();
if (cameraData == null)
{
Debug.LogWarning("No UniversalAdditionalCameraData found on camera. Adding one.");
cameraData = cam.gameObject.AddComponent<UniversalAdditionalCameraData>();
}
rgbRT = new RenderTexture(width, height, 24, RenderTextureFormat.ARGB32);
depthRT = new RenderTexture(width, height, 24, RenderTextureFormat.ARGB32);
}
[ContextMenu("Start RGB-D Capture Sequence")]
public void StartCaptureSequence()
{
if (!isCapturing)
{
currentFrame = 0;
StartCoroutine(CaptureSequence());
}
}
IEnumerator CaptureSequence()
{
yield return new WaitForEndOfFrame();
isCapturing = true;
while (currentFrame < totalFrames)
{
Quaternion currentRotation = cam.transform.rotation;
if (lastRotation.HasValue)
{
Quaternion delta = Quaternion.Inverse(lastRotation.Value) * currentRotation;
float angle = Quaternion.Angle(Quaternion.identity, delta);
Debug.Log($"[Frame {currentFrame}] Delta Rotation Angle = {angle:F4}°");
}
lastRotation = currentRotation;
string rgbName = $"rgb_{currentFrame:D4}.png";
string depthName = $"depth_{currentFrame:D4}.exr";
string poseName = $"pose_{currentFrame:D4}.txt";
// 🟡 先保存 Pose(图像采样前)
SaveCameraPose(poseName);
// --- Capture RGB ---
cameraData.SetRenderer(0); // RGB Renderer
cam.targetTexture = rgbRT;
cam.Render();
SaveRenderTextureToPNG(rgbRT, rgbName);
// --- Capture Depth ---
cameraData.SetRenderer(1); // Depth Renderer
cam.targetTexture = depthRT;
cam.Render();
SaveRenderTextureToEXR(depthRT, depthName);
cam.targetTexture = null;
Debug.Log($"Captured frame {currentFrame}");
currentFrame++;
yield return new WaitForSeconds(interval);
}
isCapturing = false;
Debug.Log("RGB-D sequence capture complete.");
}
void SaveRenderTextureToPNG(RenderTexture rt, string fileName)
{
string folder = Application.dataPath + "/Captured/";
if (!Directory.Exists(folder))
Directory.CreateDirectory(folder);
string path = Path.Combine(folder, fileName);
RenderTexture.active = rt;
Texture2D tex = new Texture2D(rt.width, rt.height, TextureFormat.RGB24, false);
tex.ReadPixels(new Rect(0, 0, rt.width, rt.height), 0, 0);
tex.Apply();
byte[] bytes = tex.EncodeToPNG();
File.WriteAllBytes(path, bytes);
RenderTexture.active = null;
}
void SaveRenderTextureToEXR(RenderTexture rt, string fileName)
{
string folder = Application.dataPath + "/Captured/";
if (!Directory.Exists(folder))
Directory.CreateDirectory(folder);
string path = Path.Combine(folder, fileName);
RenderTexture.active = rt;
Texture2D tex = new Texture2D(rt.width, rt.height, TextureFormat.RGBAFloat, false);
tex.ReadPixels(new Rect(0, 0, rt.width, rt.height), 0, 0);
tex.Apply();
byte[] bytes = tex.EncodeToEXR(Texture2D.EXRFlags.OutputAsFloat);
File.WriteAllBytes(path, bytes);
RenderTexture.active = null;
}
void SaveCameraPose(string fileName)
{
string folder = Application.dataPath + "/Captured/";
if (!Directory.Exists(folder))
Directory.CreateDirectory(folder);
string path = Path.Combine(folder, fileName);
// 获取相机的位姿(世界坐标系中的 4x4 矩阵)
// Matrix4x4 m = cam.transform.localToWorldMatrix;
Matrix4x4 m = Matrix4x4.TRS(cam.transform.position, cam.transform.rotation, Vector3.one);
using (StreamWriter writer = new StreamWriter(path))
{
for (int i = 0; i < 4; i++)
{
string line = $"{m[i, 0]} {m[i, 1]} {m[i, 2]} {m[i, 3]}";
writer.WriteLine(line);
}
}
}
}
2.generated point cloud and align them
import os
import cv2
import numpy as np
import open3d as o3d
import OpenEXR
import Imath
def read_exr_depth(exr_path):
exr_file = OpenEXR.InputFile(exr_path)
dw = exr_file.header()['dataWindow']
width = dw.max.x - dw.min.x + 1
height = dw.max.y - dw.min.y + 1
pt = Imath.PixelType(Imath.PixelType.FLOAT)
depth_str = exr_file.channel('R', pt)
depth = np.frombuffer(depth_str, dtype=np.float32).reshape((height, width))
return depth
def unity_to_opencv_coords(points):
points[:, 1] *= -1
return points
def convert_unity_pose_to_opencv(pose_unity):
R_u2cv = np.diag([1, -1, -1]) # Unity → OpenCV
T_u2cv = np.eye(4)
T_u2cv[:3, :3] = R_u2cv
# Apply similarity transform: T * P * T⁻¹
pose_cv = T_u2cv @ pose_unity @ np.linalg.inv(T_u2cv)
return pose_cv
def load_pose(path):
return np.loadtxt(path)
def create_pointcloud(rgb_img, depth_img, fx, fy, cx, cy):
h, w = depth_img.shape
i, j = np.meshgrid(np.arange(w), np.arange(h))
z = depth_img.copy()
z[z >= 0.99] = 0
x = (i - cx) * z / fx
y = (j - cy) * z / fy
points = np.stack((x, y, z), axis=-1).reshape(-1, 3)
colors = rgb_img[:, :, :3].reshape(-1, 3) / 255.0 # BGR to RGB
return points, colors
def transform_points(points, pose):
points_hom = np.hstack((points, np.ones((points.shape[0], 1))))
# pose = np.linalg.inv(pose)
points_world = (pose @ points_hom.T).T[:, :3]
return points_world
fx, fy, cx, cy = 1598.515 , 865.4121, 320, 240
data_dir = "/home/jimmy/task/Unity_Projects/Terrain/Assets/Captured"
num_frames = 10
all_points = []
all_colors = []
pose_lists = []
for i in range(20):
rgb_path = os.path.join(data_dir, f"rgb_{i:04d}.png")
depth_path = os.path.join(data_dir, f"depth_{i:04d}.exr")
pose_path = os.path.join(data_dir, f"pose_{i:04d}.txt")
rgb = cv2.imread(rgb_path, cv2.IMREAD_UNCHANGED)[:, :, ::-1] # BGR -> RGB
depth = read_exr_depth(depth_path)
pose = load_pose(pose_path)
pose = convert_unity_pose_to_opencv(pose)
# print(f"pose = {pose}")
T_center = np.zeros(3)
# T_center[:3, 3] = -pose[:3, 3]
pose[:3,3] = T_center
pose_rotation = pose[:3,:3]
pose[:3,:3] = pose_rotation
# print(f"pose = {pose}")
points, colors = create_pointcloud(rgb, depth, fx, fy, cx, cy)
points = unity_to_opencv_coords(points)
points_world = transform_points(points, pose)
coord_pose = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1)
coord_pose.transform(pose)
all_points.append(points_world)
# all_points.append(points)
all_colors.append(colors)
pose_lists.append(coord_pose)
all_points = np.vstack(all_points)
all_colors = np.vstack(all_colors)
coord = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1)
coord.transform(pose)
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(all_points)
pcd.colors = o3d.utility.Vector3dVector(all_colors)
o3d.visualization.draw_geometries([*pose_lists])
o3d.visualization.draw_geometries([pcd, coord])
`
3. misaligned picture
Unity version is 2022.03.53
I look forward to your response. Thank you for your help!