Source code for vision3d.ops._project

"""3D-to-2D camera projection utilities."""

import torch
from torch import Tensor


[docs] def project_to_image( points_3d: Tensor, extrinsics: Tensor, intrinsics: Tensor, ) -> tuple[Tensor, Tensor]: """Project 3D points in lidar frame to pixel coordinates. Args: points_3d: Points in lidar frame ``[N, 3]``. extrinsics: Lidar-to-camera transformation ``[4, 4]``. intrinsics: Camera intrinsic matrix ``[3, 3]``. Returns: ``(uv, depth)`` where ``uv`` is the pixel coordinates ``[N, 2]`` (u, v) and ``depth`` is the camera-frame depth ``[N]``. """ n = points_3d.shape[0] ones = torch.ones(n, 1, dtype=points_3d.dtype, device=points_3d.device) pts_hom = torch.cat([points_3d, ones], dim=1) # [N, 4] # Transform to camera frame: [4, 4] @ [4, N] -> [4, N] -> [N, 4] pts_cam = (extrinsics @ pts_hom.T).T # [N, 4] pts_cam_3d = pts_cam[:, :3] # [N, 3] depth = pts_cam_3d[:, 2] # [N] # Project to pixel: [3, 3] @ [3, N] -> [3, N] -> [N, 3] pts_img = (intrinsics @ pts_cam_3d.T).T # [N, 3] u = pts_img[:, 0] / depth v = pts_img[:, 1] / depth # Points behind the camera (depth <= 0) get NaN pixel coordinates behind = depth <= 0 u = u.masked_fill(behind, torch.nan) v = v.masked_fill(behind, torch.nan) uv = torch.stack([u, v], dim=-1) # [N, 2] return uv, depth