I encountered a strange problem regarding the reconstruction of a 3D point cloud in Unity.

I captured RGB-D data and camera poses from Unity, then generated point clouds from the depth images. When I attempted to align the point clouds in a global coordinate frame using the camera poses for 3D reconstruction, the output was misaligned or distorted. Despite debugging, I couldn’t identify the root cause. A different approach also yielded unsatisfactory results.

Steps to reproduce the behavior:
1. captured RGB-D and camera poses
`using UnityEngine;
using UnityEngine.Rendering.Universal;
using System.Collections;
using System.IO;

public class RGBDCapture : MonoBehaviour
{
    public Camera cam;
    private UniversalAdditionalCameraData cameraData;

    private RenderTexture rgbRT;
    private RenderTexture depthRT;

    private int width = 640;
    private int height = 480;

    public int totalFrames = 30;       // 你要采集的帧数
    public float interval = 1.5f;      // 每帧间隔秒数
    private int currentFrame = 0;
    private Quaternion? lastRotation = null;
    private bool isCapturing = false;

    void Start()
    {
        if (cam == null)
        {
            Debug.LogError("Camera not assigned!");
            return;
        }

        cameraData = cam.GetComponent<UniversalAdditionalCameraData>();
        if (cameraData == null)
        {
            Debug.LogWarning("No UniversalAdditionalCameraData found on camera. Adding one.");
            cameraData = cam.gameObject.AddComponent<UniversalAdditionalCameraData>();
        }

        rgbRT = new RenderTexture(width, height, 24, RenderTextureFormat.ARGB32);
        depthRT = new RenderTexture(width, height, 24, RenderTextureFormat.ARGB32);
    }

    [ContextMenu("Start RGB-D Capture Sequence")]
    public void StartCaptureSequence()
    {
        if (!isCapturing)
        {
            currentFrame = 0;
            StartCoroutine(CaptureSequence());
        }
    }

    IEnumerator CaptureSequence()
    {
        yield return new WaitForEndOfFrame();
        isCapturing = true;


        while (currentFrame < totalFrames)
        {
             Quaternion currentRotation = cam.transform.rotation;

            if (lastRotation.HasValue)
            {
                Quaternion delta = Quaternion.Inverse(lastRotation.Value) * currentRotation;
                float angle = Quaternion.Angle(Quaternion.identity, delta);
                Debug.Log($"[Frame {currentFrame}] Delta Rotation Angle = {angle:F4}°");
            }

            lastRotation = currentRotation;
            
            string rgbName = $"rgb_{currentFrame:D4}.png";
            string depthName = $"depth_{currentFrame:D4}.exr";
            string poseName = $"pose_{currentFrame:D4}.txt";

            // 🟡 先保存 Pose（图像采样前）
            SaveCameraPose(poseName);

            // --- Capture RGB ---
            cameraData.SetRenderer(0); // RGB Renderer
            cam.targetTexture = rgbRT;
            cam.Render();
            SaveRenderTextureToPNG(rgbRT, rgbName);

            // --- Capture Depth ---
            cameraData.SetRenderer(1); // Depth Renderer
            cam.targetTexture = depthRT;
            cam.Render();
            SaveRenderTextureToEXR(depthRT, depthName);

            cam.targetTexture = null;
            Debug.Log($"Captured frame {currentFrame}");

            currentFrame++;
            yield return new WaitForSeconds(interval);
        }

        isCapturing = false;
        Debug.Log("RGB-D sequence capture complete.");
    }

    void SaveRenderTextureToPNG(RenderTexture rt, string fileName)
    {
        string folder = Application.dataPath + "/Captured/";
        if (!Directory.Exists(folder))
            Directory.CreateDirectory(folder);

        string path = Path.Combine(folder, fileName);
        RenderTexture.active = rt;
        Texture2D tex = new Texture2D(rt.width, rt.height, TextureFormat.RGB24, false);
        tex.ReadPixels(new Rect(0, 0, rt.width, rt.height), 0, 0);
        tex.Apply();

        byte[] bytes = tex.EncodeToPNG();
        File.WriteAllBytes(path, bytes);

        RenderTexture.active = null;
    }

    void SaveRenderTextureToEXR(RenderTexture rt, string fileName)
    {
        string folder = Application.dataPath + "/Captured/";
        if (!Directory.Exists(folder))
            Directory.CreateDirectory(folder);

        string path = Path.Combine(folder, fileName);

        RenderTexture.active = rt;

        Texture2D tex = new Texture2D(rt.width, rt.height, TextureFormat.RGBAFloat, false);
        tex.ReadPixels(new Rect(0, 0, rt.width, rt.height), 0, 0);
        tex.Apply();

        byte[] bytes = tex.EncodeToEXR(Texture2D.EXRFlags.OutputAsFloat);
        File.WriteAllBytes(path, bytes);

        RenderTexture.active = null;
    }

    void SaveCameraPose(string fileName)
    {
        string folder = Application.dataPath + "/Captured/";
        if (!Directory.Exists(folder))
            Directory.CreateDirectory(folder);

        string path = Path.Combine(folder, fileName);

        // 获取相机的位姿（世界坐标系中的 4x4 矩阵）
        // Matrix4x4 m = cam.transform.localToWorldMatrix;
        Matrix4x4 m = Matrix4x4.TRS(cam.transform.position, cam.transform.rotation, Vector3.one);
        using (StreamWriter writer = new StreamWriter(path))
        {
            for (int i = 0; i < 4; i++)
            {
                string line = $"{m[i, 0]} {m[i, 1]} {m[i, 2]} {m[i, 3]}";
                writer.WriteLine(line);
            }
        }
    }
}
`
2.generated point cloud and align them
`import os
import cv2
import numpy as np
import open3d as o3d
import OpenEXR
import Imath

def read_exr_depth(exr_path):
    exr_file = OpenEXR.InputFile(exr_path)
    dw = exr_file.header()['dataWindow']
    width = dw.max.x - dw.min.x + 1
    height = dw.max.y - dw.min.y + 1
    pt = Imath.PixelType(Imath.PixelType.FLOAT)
    depth_str = exr_file.channel('R', pt)
    depth = np.frombuffer(depth_str, dtype=np.float32).reshape((height, width))
    return depth

def unity_to_opencv_coords(points):

    points[:, 1] *= -1
    return points

def convert_unity_pose_to_opencv(pose_unity):

    R_u2cv = np.diag([1, -1, -1])  # Unity → OpenCV
    T_u2cv = np.eye(4)
    T_u2cv[:3, :3] = R_u2cv

    # Apply similarity transform: T * P * T⁻¹
    pose_cv = T_u2cv @ pose_unity @ np.linalg.inv(T_u2cv)
    return pose_cv

 
def load_pose(path):
    return np.loadtxt(path)  

def create_pointcloud(rgb_img, depth_img, fx, fy, cx, cy):
    h, w = depth_img.shape
    i, j = np.meshgrid(np.arange(w), np.arange(h))
    z = depth_img.copy()
    z[z >= 0.99] = 0  

    x = (i - cx) * z / fx
    y = (j - cy) * z / fy

    points = np.stack((x, y, z), axis=-1).reshape(-1, 3)
    colors = rgb_img[:, :, :3].reshape(-1, 3) / 255.0  # BGR to RGB
    return points, colors

def transform_points(points, pose):
    points_hom = np.hstack((points, np.ones((points.shape[0], 1))))
    # pose = np.linalg.inv(pose) 
    points_world = (pose @ points_hom.T).T[:, :3]
    return points_world

fx, fy, cx, cy = 1598.515 , 865.4121, 320, 240

data_dir = "/home/jimmy/task/Unity_Projects/Terrain/Assets/Captured"
num_frames = 10

all_points = []
all_colors = []
pose_lists = []
for i in range(20):
    rgb_path = os.path.join(data_dir, f"rgb_{i:04d}.png")
    depth_path = os.path.join(data_dir, f"depth_{i:04d}.exr")
    pose_path = os.path.join(data_dir, f"pose_{i:04d}.txt")

    rgb = cv2.imread(rgb_path, cv2.IMREAD_UNCHANGED)[:, :, ::-1]  # BGR -> RGB
    depth = read_exr_depth(depth_path)
    pose = load_pose(pose_path)
    pose = convert_unity_pose_to_opencv(pose)
    # print(f"pose = {pose}")
    T_center = np.zeros(3)
    # T_center[:3, 3] = -pose[:3, 3] 
    pose[:3,3] = T_center 
    pose_rotation = pose[:3,:3]

    pose[:3,:3] = pose_rotation
    # print(f"pose = {pose}")
    points, colors = create_pointcloud(rgb, depth, fx, fy, cx, cy)
    points = unity_to_opencv_coords(points)
    points_world = transform_points(points, pose)
    coord_pose = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1)
    coord_pose.transform(pose)
    all_points.append(points_world)
    # all_points.append(points)
    all_colors.append(colors)
    pose_lists.append(coord_pose)


all_points = np.vstack(all_points) 
all_colors = np.vstack(all_colors)

coord = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1)
# coord.transform(pose)
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(all_points)
pcd.colors = o3d.utility.Vector3dVector(all_colors)
o3d.visualization.draw_geometries([*pose_lists])
o3d.visualization.draw_geometries([pcd, coord])

`
3. misaligned picture 

![Image](https://github.com/user-attachments/assets/5937c267-ee4a-4d7e-9aac-04935ebf37ce)


Unity version is 2022.03.53

I look forward to your response. Thank you for your help!

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

I encountered a strange problem regarding the reconstruction of a 3D point cloud in Unity. #6213

coord.transform(pose)

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

I encountered a strange problem regarding the reconstruction of a 3D point cloud in Unity. #6213

Description

coord.transform(pose)

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions