Skip to content

I encountered a strange problem regarding the reconstruction of a 3D point cloud in Unity. #6213

Open
@xiaolijz

Description

@xiaolijz

I captured RGB-D data and camera poses from Unity, then generated point clouds from the depth images. When I attempted to align the point clouds in a global coordinate frame using the camera poses for 3D reconstruction, the output was misaligned or distorted. Despite debugging, I couldn’t identify the root cause. A different approach also yielded unsatisfactory results.

Steps to reproduce the behavior:

  1. captured RGB-D and camera poses
    `using UnityEngine;
    using UnityEngine.Rendering.Universal;
    using System.Collections;
    using System.IO;

public class RGBDCapture : MonoBehaviour
{
public Camera cam;
private UniversalAdditionalCameraData cameraData;

private RenderTexture rgbRT;
private RenderTexture depthRT;

private int width = 640;
private int height = 480;

public int totalFrames = 30;       // 你要采集的帧数
public float interval = 1.5f;      // 每帧间隔秒数
private int currentFrame = 0;
private Quaternion? lastRotation = null;
private bool isCapturing = false;

void Start()
{
    if (cam == null)
    {
        Debug.LogError("Camera not assigned!");
        return;
    }

    cameraData = cam.GetComponent<UniversalAdditionalCameraData>();
    if (cameraData == null)
    {
        Debug.LogWarning("No UniversalAdditionalCameraData found on camera. Adding one.");
        cameraData = cam.gameObject.AddComponent<UniversalAdditionalCameraData>();
    }

    rgbRT = new RenderTexture(width, height, 24, RenderTextureFormat.ARGB32);
    depthRT = new RenderTexture(width, height, 24, RenderTextureFormat.ARGB32);
}

[ContextMenu("Start RGB-D Capture Sequence")]
public void StartCaptureSequence()
{
    if (!isCapturing)
    {
        currentFrame = 0;
        StartCoroutine(CaptureSequence());
    }
}

IEnumerator CaptureSequence()
{
    yield return new WaitForEndOfFrame();
    isCapturing = true;


    while (currentFrame < totalFrames)
    {
         Quaternion currentRotation = cam.transform.rotation;

        if (lastRotation.HasValue)
        {
            Quaternion delta = Quaternion.Inverse(lastRotation.Value) * currentRotation;
            float angle = Quaternion.Angle(Quaternion.identity, delta);
            Debug.Log($"[Frame {currentFrame}] Delta Rotation Angle = {angle:F4}°");
        }

        lastRotation = currentRotation;
        
        string rgbName = $"rgb_{currentFrame:D4}.png";
        string depthName = $"depth_{currentFrame:D4}.exr";
        string poseName = $"pose_{currentFrame:D4}.txt";

        // 🟡 先保存 Pose(图像采样前)
        SaveCameraPose(poseName);

        // --- Capture RGB ---
        cameraData.SetRenderer(0); // RGB Renderer
        cam.targetTexture = rgbRT;
        cam.Render();
        SaveRenderTextureToPNG(rgbRT, rgbName);

        // --- Capture Depth ---
        cameraData.SetRenderer(1); // Depth Renderer
        cam.targetTexture = depthRT;
        cam.Render();
        SaveRenderTextureToEXR(depthRT, depthName);

        cam.targetTexture = null;
        Debug.Log($"Captured frame {currentFrame}");

        currentFrame++;
        yield return new WaitForSeconds(interval);
    }

    isCapturing = false;
    Debug.Log("RGB-D sequence capture complete.");
}

void SaveRenderTextureToPNG(RenderTexture rt, string fileName)
{
    string folder = Application.dataPath + "/Captured/";
    if (!Directory.Exists(folder))
        Directory.CreateDirectory(folder);

    string path = Path.Combine(folder, fileName);
    RenderTexture.active = rt;
    Texture2D tex = new Texture2D(rt.width, rt.height, TextureFormat.RGB24, false);
    tex.ReadPixels(new Rect(0, 0, rt.width, rt.height), 0, 0);
    tex.Apply();

    byte[] bytes = tex.EncodeToPNG();
    File.WriteAllBytes(path, bytes);

    RenderTexture.active = null;
}

void SaveRenderTextureToEXR(RenderTexture rt, string fileName)
{
    string folder = Application.dataPath + "/Captured/";
    if (!Directory.Exists(folder))
        Directory.CreateDirectory(folder);

    string path = Path.Combine(folder, fileName);

    RenderTexture.active = rt;

    Texture2D tex = new Texture2D(rt.width, rt.height, TextureFormat.RGBAFloat, false);
    tex.ReadPixels(new Rect(0, 0, rt.width, rt.height), 0, 0);
    tex.Apply();

    byte[] bytes = tex.EncodeToEXR(Texture2D.EXRFlags.OutputAsFloat);
    File.WriteAllBytes(path, bytes);

    RenderTexture.active = null;
}

void SaveCameraPose(string fileName)
{
    string folder = Application.dataPath + "/Captured/";
    if (!Directory.Exists(folder))
        Directory.CreateDirectory(folder);

    string path = Path.Combine(folder, fileName);

    // 获取相机的位姿(世界坐标系中的 4x4 矩阵)
    // Matrix4x4 m = cam.transform.localToWorldMatrix;
    Matrix4x4 m = Matrix4x4.TRS(cam.transform.position, cam.transform.rotation, Vector3.one);
    using (StreamWriter writer = new StreamWriter(path))
    {
        for (int i = 0; i < 4; i++)
        {
            string line = $"{m[i, 0]} {m[i, 1]} {m[i, 2]} {m[i, 3]}";
            writer.WriteLine(line);
        }
    }
}

}
2.generated point cloud and align themimport os
import cv2
import numpy as np
import open3d as o3d
import OpenEXR
import Imath

def read_exr_depth(exr_path):
exr_file = OpenEXR.InputFile(exr_path)
dw = exr_file.header()['dataWindow']
width = dw.max.x - dw.min.x + 1
height = dw.max.y - dw.min.y + 1
pt = Imath.PixelType(Imath.PixelType.FLOAT)
depth_str = exr_file.channel('R', pt)
depth = np.frombuffer(depth_str, dtype=np.float32).reshape((height, width))
return depth

def unity_to_opencv_coords(points):

points[:, 1] *= -1
return points

def convert_unity_pose_to_opencv(pose_unity):

R_u2cv = np.diag([1, -1, -1])  # Unity → OpenCV
T_u2cv = np.eye(4)
T_u2cv[:3, :3] = R_u2cv

# Apply similarity transform: T * P * T⁻¹
pose_cv = T_u2cv @ pose_unity @ np.linalg.inv(T_u2cv)
return pose_cv

def load_pose(path):
return np.loadtxt(path)

def create_pointcloud(rgb_img, depth_img, fx, fy, cx, cy):
h, w = depth_img.shape
i, j = np.meshgrid(np.arange(w), np.arange(h))
z = depth_img.copy()
z[z >= 0.99] = 0

x = (i - cx) * z / fx
y = (j - cy) * z / fy

points = np.stack((x, y, z), axis=-1).reshape(-1, 3)
colors = rgb_img[:, :, :3].reshape(-1, 3) / 255.0  # BGR to RGB
return points, colors

def transform_points(points, pose):
points_hom = np.hstack((points, np.ones((points.shape[0], 1))))
# pose = np.linalg.inv(pose)
points_world = (pose @ points_hom.T).T[:, :3]
return points_world

fx, fy, cx, cy = 1598.515 , 865.4121, 320, 240

data_dir = "/home/jimmy/task/Unity_Projects/Terrain/Assets/Captured"
num_frames = 10

all_points = []
all_colors = []
pose_lists = []
for i in range(20):
rgb_path = os.path.join(data_dir, f"rgb_{i:04d}.png")
depth_path = os.path.join(data_dir, f"depth_{i:04d}.exr")
pose_path = os.path.join(data_dir, f"pose_{i:04d}.txt")

rgb = cv2.imread(rgb_path, cv2.IMREAD_UNCHANGED)[:, :, ::-1]  # BGR -> RGB
depth = read_exr_depth(depth_path)
pose = load_pose(pose_path)
pose = convert_unity_pose_to_opencv(pose)
# print(f"pose = {pose}")
T_center = np.zeros(3)
# T_center[:3, 3] = -pose[:3, 3] 
pose[:3,3] = T_center 
pose_rotation = pose[:3,:3]

pose[:3,:3] = pose_rotation
# print(f"pose = {pose}")
points, colors = create_pointcloud(rgb, depth, fx, fy, cx, cy)
points = unity_to_opencv_coords(points)
points_world = transform_points(points, pose)
coord_pose = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1)
coord_pose.transform(pose)
all_points.append(points_world)
# all_points.append(points)
all_colors.append(colors)
pose_lists.append(coord_pose)

all_points = np.vstack(all_points)
all_colors = np.vstack(all_colors)

coord = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1)

coord.transform(pose)

pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(all_points)
pcd.colors = o3d.utility.Vector3dVector(all_colors)
o3d.visualization.draw_geometries([*pose_lists])
o3d.visualization.draw_geometries([pcd, coord])

`
3. misaligned picture

Image

Unity version is 2022.03.53

I look forward to your response. Thank you for your help!

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugIssue describes a potential bug in ml-agents.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions