Source code for vision3d.transforms.functional._geometry

"""Functional kernels for 3D geometric transforms."""

import math

import torch
from torch import Tensor
from torchvision.tv_tensors import TVTensor

from vision3d.tensors import (
    BoundingBox3DFormat,
    BoundingBoxes3D,
    CameraExtrinsics,
    PointCloud3D,
)

from ._registry import register_kernel

# Axis indices for flip
AXIS_INDEX = {"x": 0, "y": 1, "z": 2}

# Which rotation angles to negate for each flip axis.
# Convention: yaw=around Z (idx 6), pitch=around Y (idx 7), roll=around X (idx 8).
# A flip negates angles that rotate around axes OTHER than the flip axis.
_FLIP_NEGATE_YPR: dict[str, list[int]] = {
    "x": [6, 7],  # negate yaw (Z) and pitch (Y)
    "y": [6, 8],  # negate yaw (Z) and roll (X)
    "z": [7, 8],  # negate pitch (Y) and roll (X)
}


[docs] def flip_3d(inpt: Tensor, *, axis: str) -> Tensor: """Flip a tensor along a 3D spatial axis. This is the dispatcher entry point. Type-specific kernels are registered below. Args: inpt: Input tensor. axis: One of ``"x"``, ``"y"``, ``"z"``. Returns: Flipped tensor. """ return inpt
[docs] def flip_3d_point_cloud(points: Tensor, *, axis: str) -> Tensor: """Flip point cloud coordinates along ``axis``. Args: points: Point cloud tensor ``[..., 3+C]``. axis: One of ``"x"``, ``"y"``, ``"z"``. Returns: Flipped point cloud with the same shape. """ idx = AXIS_INDEX[axis] shape = points.shape points = points.clone().reshape(-1, shape[-1]) points[:, idx].neg_() return points.reshape(shape)
@register_kernel(flip_3d, PointCloud3D) def _flip_3d_point_cloud_kernel(points: Tensor, *, axis: str) -> Tensor: return flip_3d_point_cloud(points, axis=axis)
[docs] def flip_3d_bounding_boxes( boxes: Tensor, *, format: BoundingBox3DFormat, axis: str ) -> Tensor: """Flip 3D bounding boxes along ``axis``. Args: boxes: Bounding box tensor ``[..., K]``. format: Format of the boxes. axis: One of ``"x"``, ``"y"``, ``"z"``. Returns: Flipped bounding boxes with the same shape. """ idx = AXIS_INDEX[axis] shape = boxes.shape boxes = boxes.clone().reshape(-1, shape[-1]) if format is BoundingBox3DFormat.XYZXYZ: # Swap and negate: min/max corners flip lo, hi = idx, idx + 3 boxes[:, [lo, hi]] = boxes[:, [hi, lo]].neg_() elif format in ( BoundingBox3DFormat.XYZLWH, BoundingBox3DFormat.XYZLWHY, BoundingBox3DFormat.XYZLWHYPR, ): boxes[:, idx].neg_() if format is BoundingBox3DFormat.XYZLWHY: if axis in ("x", "y"): boxes[:, 6].neg_() elif format is BoundingBox3DFormat.XYZLWHYPR: for angle_idx in _FLIP_NEGATE_YPR[axis]: boxes[:, angle_idx].neg_() return boxes.reshape(shape)
@register_kernel(flip_3d, BoundingBoxes3D, tv_tensor_wrapper=False) def _flip_3d_bounding_boxes_dispatch(inpt: BoundingBoxes3D, *, axis: str) -> TVTensor: from vision3d.tensors import wrap output = flip_3d_bounding_boxes( inpt.as_subclass(Tensor), format=inpt.format, axis=axis ) return wrap(output, like=inpt)
[docs] def translate_3d(inpt: Tensor, *, offset: Tensor) -> Tensor: """Translate a tensor by a 3D offset. Dispatcher entry point. Type-specific kernels are registered below. Args: inpt: Input tensor. offset: Translation ``[3]`` as ``(tx, ty, tz)``. Returns: Translated tensor. """ return inpt
[docs] def translate_3d_point_cloud(points: Tensor, *, offset: Tensor) -> Tensor: """Translate point cloud coordinates by ``offset``. Args: points: Point cloud tensor ``[..., 3+C]``. offset: Translation ``[3]`` as ``(tx, ty, tz)``. Returns: Translated point cloud with the same shape. """ points = points.clone() points[..., :3] += offset return points
@register_kernel(translate_3d, PointCloud3D) def _translate_3d_point_cloud_kernel(points: Tensor, *, offset: Tensor) -> Tensor: return translate_3d_point_cloud(points, offset=offset)
[docs] def translate_3d_bounding_boxes( boxes: Tensor, *, format: BoundingBox3DFormat, offset: Tensor ) -> Tensor: """Translate 3D bounding boxes by ``offset``. Args: boxes: Bounding box tensor ``[..., K]``. format: Format of the boxes. offset: Translation ``[3]`` as ``(tx, ty, tz)``. Returns: Translated bounding boxes with the same shape. """ boxes = boxes.clone() if format is BoundingBox3DFormat.XYZXYZ: boxes[..., :3] += offset boxes[..., 3:6] += offset else: # XYZLWH, XYZLWHY, XYZLWHYPR: first 3 are center boxes[..., :3] += offset return boxes
@register_kernel(translate_3d, BoundingBoxes3D, tv_tensor_wrapper=False) def _translate_3d_bounding_boxes_dispatch( inpt: BoundingBoxes3D, *, offset: Tensor ) -> TVTensor: from vision3d.tensors import wrap output = translate_3d_bounding_boxes( inpt.as_subclass(Tensor), format=inpt.format, offset=offset ) return wrap(output, like=inpt)
[docs] def translate_3d_camera_extrinsics(extrinsics: Tensor, *, offset: Tensor) -> Tensor: """Update camera extrinsics after translating the lidar frame. The lidar-to-camera extrinsic translation changes because the lidar origin moved by ``offset`` in the lidar frame. Args: extrinsics: Extrinsic matrices ``[..., 4, 4]``. offset: Translation ``[3]`` as ``(tx, ty, tz)`` in lidar frame. Returns: Updated extrinsics with the same shape. """ extrinsics = extrinsics.clone() # E' = E @ T_inv where T_inv translates by -offset # This is equivalent to: E'[:3, 3] -= E[:3, :3] @ offset extrinsics[..., :3, 3] -= extrinsics[..., :3, :3] @ offset return extrinsics
@register_kernel(translate_3d, CameraExtrinsics) def _translate_3d_camera_extrinsics_kernel( extrinsics: Tensor, *, offset: Tensor ) -> Tensor: return translate_3d_camera_extrinsics(extrinsics, offset=offset) def _rotation_matrix(axis: Tensor, angle: float) -> Tensor: """Build a 3x3 rotation matrix from an axis and angle (radians). Uses Rodrigues' rotation formula. Args: axis: Unit vector ``[3]`` defining the rotation axis. angle: Rotation angle in radians. Returns: ``[3, 3]`` rotation matrix. """ axis = axis / axis.norm() K = torch.tensor( [[0, -axis[2], axis[1]], [axis[2], 0, -axis[0]], [-axis[1], axis[0], 0]], dtype=axis.dtype, ) R = ( torch.eye(3, dtype=axis.dtype) + math.sin(angle) * K + (1 - math.cos(angle)) * (K @ K) ) return R
[docs] def rotate_3d(inpt: Tensor, *, rotation_matrix: Tensor) -> Tensor: """Rotate a tensor by a 3x3 rotation matrix. Dispatcher entry point. Type-specific kernels are registered below. Args: inpt: Input tensor. rotation_matrix: ``[3, 3]`` rotation matrix. Returns: Rotated tensor. """ return inpt
[docs] def rotate_3d_point_cloud(points: Tensor, *, rotation_matrix: Tensor) -> Tensor: """Rotate point cloud coordinates by ``rotation_matrix``. Args: points: Point cloud tensor ``[..., 3+C]``. rotation_matrix: ``[3, 3]`` rotation matrix. Returns: Rotated point cloud with the same shape. """ points = points.clone() points[..., :3] = points[..., :3] @ rotation_matrix.T return points
@register_kernel(rotate_3d, PointCloud3D) def _rotate_3d_point_cloud_kernel(points: Tensor, *, rotation_matrix: Tensor) -> Tensor: return rotate_3d_point_cloud(points, rotation_matrix=rotation_matrix)
[docs] def rotate_3d_bounding_boxes( boxes: Tensor, *, format: BoundingBox3DFormat, rotation_matrix: Tensor ) -> Tensor: """Rotate 3D bounding boxes by ``rotation_matrix``. Only rotated formats are supported: - ``XYZLWHY``: only Z-axis rotations (pure yaw). - ``XYZLWHYPR``: arbitrary rotations. Axis-aligned formats (``XYZXYZ``, ``XYZLWH``) cannot represent rotation and will raise :class:`NotImplementedError`. Args: boxes: Bounding box tensor ``[..., K]``. format: Format of the boxes. rotation_matrix: ``[3, 3]`` rotation matrix. Returns: Rotated bounding boxes with the same shape. Raises: NotImplementedError: If ``format`` is axis-aligned. ValueError: If ``format`` is ``XYZLWHY`` and rotation is not pure yaw. """ if format in (BoundingBox3DFormat.XYZXYZ, BoundingBox3DFormat.XYZLWH): msg = f"Rotation is not supported for axis-aligned format {format.value}." raise NotImplementedError(msg) boxes = boxes.clone() boxes[..., :3] = boxes[..., :3] @ rotation_matrix.T if format is BoundingBox3DFormat.XYZLWHY: # Only pure Z-axis rotation is valid for yaw-only boxes if not _is_z_rotation(rotation_matrix): msg = "XYZLWHY only supports Z-axis rotation." raise ValueError(msg) yaw_delta = math.atan2( rotation_matrix[1, 0].item(), rotation_matrix[0, 0].item() ) boxes[..., 6] += yaw_delta elif format is BoundingBox3DFormat.XYZLWHYPR: yaw_delta = math.atan2( rotation_matrix[1, 0].item(), rotation_matrix[0, 0].item() ) pitch_delta = math.atan2( -rotation_matrix[2, 0].item(), math.sqrt( rotation_matrix[2, 1].item() ** 2 + rotation_matrix[2, 2].item() ** 2 ), ) roll_delta = math.atan2( rotation_matrix[2, 1].item(), rotation_matrix[2, 2].item() ) boxes[..., 6] += yaw_delta boxes[..., 7] += pitch_delta boxes[..., 8] += roll_delta return boxes
@register_kernel(rotate_3d, BoundingBoxes3D, tv_tensor_wrapper=False) def _rotate_3d_bounding_boxes_dispatch( inpt: BoundingBoxes3D, *, rotation_matrix: Tensor ) -> TVTensor: from vision3d.tensors import wrap output = rotate_3d_bounding_boxes( inpt.as_subclass(Tensor), format=inpt.format, rotation_matrix=rotation_matrix, ) return wrap(output, like=inpt) def _is_z_rotation(rotation_matrix: Tensor) -> bool: """Check if a rotation matrix is a pure rotation around Z. A pure Z rotation has the form ``[[c, -s, 0], [s, c, 0], [0, 0, 1]]``. Returns: True if the rotation is purely around Z. """ return ( abs(rotation_matrix[2, 0].item()) < 1e-6 and abs(rotation_matrix[2, 1].item()) < 1e-6 and abs(rotation_matrix[0, 2].item()) < 1e-6 and abs(rotation_matrix[1, 2].item()) < 1e-6 and abs(rotation_matrix[2, 2].item() - 1.0) < 1e-6 )
[docs] def rotate_3d_camera_extrinsics( extrinsics: Tensor, *, rotation_matrix: Tensor ) -> Tensor: """Update camera extrinsics after rotating the lidar frame. The lidar-to-camera extrinsic ``E`` satisfies ``p_cam = E @ p_lidar``. After rotating the lidar frame by ``R``, points become ``p' = R @ p``, so ``E' = E @ R_inv`` to keep ``p_cam = E' @ p'``. Args: extrinsics: Extrinsic matrices ``[..., 4, 4]``. rotation_matrix: ``[3, 3]`` rotation matrix. Returns: Updated extrinsics with the same shape. """ extrinsics = extrinsics.clone() R_inv_4x4 = torch.eye(4, dtype=extrinsics.dtype) R_inv_4x4[:3, :3] = rotation_matrix.T extrinsics = extrinsics @ R_inv_4x4 return extrinsics
@register_kernel(rotate_3d, CameraExtrinsics) def _rotate_3d_camera_extrinsics_kernel( extrinsics: Tensor, *, rotation_matrix: Tensor ) -> Tensor: return rotate_3d_camera_extrinsics(extrinsics, rotation_matrix=rotation_matrix)
[docs] def scale_3d(inpt: Tensor, *, factor: float) -> Tensor: """Scale a tensor by a uniform factor. Dispatcher entry point. Type-specific kernels are registered below. Args: inpt: Input tensor. factor: Scale factor. Returns: Scaled tensor. """ return inpt
[docs] def scale_3d_point_cloud(points: Tensor, *, factor: float) -> Tensor: """Scale point cloud coordinates by ``factor``. Args: points: Point cloud tensor ``[..., 3+C]``. factor: Scale factor. Returns: Scaled point cloud with the same shape. """ points = points.clone() points[..., :3] *= factor return points
[docs] def scale_3d_bounding_boxes( boxes: Tensor, *, format: BoundingBox3DFormat, factor: float ) -> Tensor: """Scale 3D bounding boxes by ``factor``. Scales both position and dimensions. Rotation angles are unchanged. Args: boxes: Bounding box tensor ``[..., K]``. format: Format of the boxes. factor: Scale factor. Returns: Scaled bounding boxes with the same shape. """ boxes = boxes.clone() if format is BoundingBox3DFormat.XYZXYZ: boxes[..., :6] *= factor else: # Center+size formats: scale center (0:3) and dimensions (3:6) boxes[..., :6] *= factor return boxes
@register_kernel(scale_3d, PointCloud3D) def _scale_3d_point_cloud_kernel(points: Tensor, *, factor: float) -> Tensor: return scale_3d_point_cloud(points, factor=factor) @register_kernel(scale_3d, BoundingBoxes3D, tv_tensor_wrapper=False) def _scale_3d_bounding_boxes_dispatch( inpt: BoundingBoxes3D, *, factor: float ) -> TVTensor: from vision3d.tensors import wrap output = scale_3d_bounding_boxes( inpt.as_subclass(Tensor), format=inpt.format, factor=factor ) return wrap(output, like=inpt)
[docs] def scale_3d_camera_extrinsics(extrinsics: Tensor, *, factor: float) -> Tensor: """Update camera extrinsics after scaling the lidar frame. Args: extrinsics: Extrinsic matrices ``[..., 4, 4]``. factor: Scale factor applied to the lidar frame. Returns: Updated extrinsics with the same shape. """ extrinsics = extrinsics.clone() extrinsics[..., :3, 3] *= factor return extrinsics
@register_kernel(scale_3d, CameraExtrinsics) def _scale_3d_camera_extrinsics_kernel(extrinsics: Tensor, *, factor: float) -> Tensor: return scale_3d_camera_extrinsics(extrinsics, factor=factor)