Source code for vision3d.ops._project
"""3D-to-2D camera projection utilities."""
import torch
from torch import Tensor
[docs]
def project_to_image(
points_3d: Tensor,
extrinsics: Tensor,
intrinsics: Tensor,
) -> tuple[Tensor, Tensor]:
"""Project 3D points in lidar frame to pixel coordinates.
Args:
points_3d: Points in lidar frame ``[N, 3]``.
extrinsics: Lidar-to-camera transformation ``[4, 4]``.
intrinsics: Camera intrinsic matrix ``[3, 3]``.
Returns:
``(uv, depth)`` where ``uv`` is the pixel coordinates ``[N, 2]`` (u, v)
and ``depth`` is the camera-frame depth ``[N]``.
"""
n = points_3d.shape[0]
ones = torch.ones(n, 1, dtype=points_3d.dtype, device=points_3d.device)
pts_hom = torch.cat([points_3d, ones], dim=1) # [N, 4]
# Transform to camera frame: [4, 4] @ [4, N] -> [4, N] -> [N, 4]
pts_cam = (extrinsics @ pts_hom.T).T # [N, 4]
pts_cam_3d = pts_cam[:, :3] # [N, 3]
depth = pts_cam_3d[:, 2] # [N]
# Project to pixel: [3, 3] @ [3, N] -> [3, N] -> [N, 3]
pts_img = (intrinsics @ pts_cam_3d.T).T # [N, 3]
u = pts_img[:, 0] / depth
v = pts_img[:, 1] / depth
# Points behind the camera (depth <= 0) get NaN pixel coordinates
behind = depth <= 0
u = u.masked_fill(behind, torch.nan)
v = v.masked_fill(behind, torch.nan)
uv = torch.stack([u, v], dim=-1) # [N, 2]
return uv, depth