Source code for vision3d.datasets.nuscenes

"""`nuScenes <https://www.nuscenes.org/>`_ Dataset."""

import json
import os
import sys
from pathlib import Path
from typing import Any, ClassVar, Literal, TypedDict, overload, override

if sys.version_info >= (3, 13):
    from typing import ReadOnly
else:
    from typing_extensions import ReadOnly

import numpy as np
import torch
from torch import Tensor
from torch.utils.data import Dataset
from torchvision.datasets.utils import download_and_extract_archive
from torchvision.io import ImageReadMode, decode_image

from vision3d.datasets import FusionInputs, SampleTargets
from vision3d.tensors import (
    BoundingBox3DFormat,
    BoundingBoxes3D,
    CameraExtrinsics,
    CameraImages,
    CameraIntrinsics,
    PointCloud3D,
)

# Detection class set used for evaluation, Mirrors
# ``nuscenes.eval.detection.constants.DETECTION_NAMES``.
_DETECTION_NAMES = (
    "car",
    "truck",
    "bus",
    "trailer",
    "construction_vehicle",
    "pedestrian",
    "motorcycle",
    "bicycle",
    "traffic_cone",
    "barrier",
)

# Mapping from the 23 fine-grained nuScenes category names to the 10
# detection classes. Mirrors
# ``nuscenes.eval.detection.utils.category_to_detection_name``.
_CATEGORY_TO_DETECTION = {
    "movable_object.barrier": "barrier",
    "vehicle.bicycle": "bicycle",
    "vehicle.bus.bendy": "bus",
    "vehicle.bus.rigid": "bus",
    "vehicle.car": "car",
    "vehicle.construction": "construction_vehicle",
    "vehicle.motorcycle": "motorcycle",
    "human.pedestrian.adult": "pedestrian",
    "human.pedestrian.child": "pedestrian",
    "human.pedestrian.construction_worker": "pedestrian",
    "human.pedestrian.police_officer": "pedestrian",
    "movable_object.trafficcone": "traffic_cone",
    "vehicle.trailer": "trailer",
    "vehicle.truck": "truck",
}

# Subset of ``nuscenes.NuScenes.table_names`` from nuscenes-devkit
# required for 3D object detection tasks.
_TABLE_NAMES = (
    "category",
    "instance",
    "sensor",
    "calibrated_sensor",
    "ego_pose",
    "scene",
    "sample",
    "sample_data",
    "sample_annotation",
)


# Hard-coded scene splits from ``nuscenes.utils.splits``.
_TRAIN = (
    "scene-0001",
    "scene-0002",
    "scene-0004",
    "scene-0005",
    "scene-0006",
    "scene-0007",
    "scene-0008",
    "scene-0009",
    "scene-0010",
    "scene-0011",
    "scene-0019",
    "scene-0020",
    "scene-0021",
    "scene-0022",
    "scene-0023",
    "scene-0024",
    "scene-0025",
    "scene-0026",
    "scene-0027",
    "scene-0028",
    "scene-0029",
    "scene-0030",
    "scene-0031",
    "scene-0032",
    "scene-0033",
    "scene-0034",
    "scene-0041",
    "scene-0042",
    "scene-0043",
    "scene-0044",
    "scene-0045",
    "scene-0046",
    "scene-0047",
    "scene-0048",
    "scene-0049",
    "scene-0050",
    "scene-0051",
    "scene-0052",
    "scene-0053",
    "scene-0054",
    "scene-0055",
    "scene-0056",
    "scene-0057",
    "scene-0058",
    "scene-0059",
    "scene-0060",
    "scene-0061",
    "scene-0062",
    "scene-0063",
    "scene-0064",
    "scene-0065",
    "scene-0066",
    "scene-0067",
    "scene-0068",
    "scene-0069",
    "scene-0070",
    "scene-0071",
    "scene-0072",
    "scene-0073",
    "scene-0074",
    "scene-0075",
    "scene-0076",
    "scene-0120",
    "scene-0121",
    "scene-0122",
    "scene-0123",
    "scene-0124",
    "scene-0125",
    "scene-0126",
    "scene-0127",
    "scene-0128",
    "scene-0129",
    "scene-0130",
    "scene-0131",
    "scene-0132",
    "scene-0133",
    "scene-0134",
    "scene-0135",
    "scene-0138",
    "scene-0139",
    "scene-0149",
    "scene-0150",
    "scene-0151",
    "scene-0152",
    "scene-0154",
    "scene-0155",
    "scene-0157",
    "scene-0158",
    "scene-0159",
    "scene-0160",
    "scene-0161",
    "scene-0162",
    "scene-0163",
    "scene-0164",
    "scene-0165",
    "scene-0166",
    "scene-0167",
    "scene-0168",
    "scene-0170",
    "scene-0171",
    "scene-0172",
    "scene-0173",
    "scene-0174",
    "scene-0175",
    "scene-0176",
    "scene-0177",
    "scene-0178",
    "scene-0179",
    "scene-0180",
    "scene-0181",
    "scene-0182",
    "scene-0183",
    "scene-0184",
    "scene-0185",
    "scene-0187",
    "scene-0188",
    "scene-0190",
    "scene-0191",
    "scene-0192",
    "scene-0193",
    "scene-0194",
    "scene-0195",
    "scene-0196",
    "scene-0199",
    "scene-0200",
    "scene-0202",
    "scene-0203",
    "scene-0204",
    "scene-0206",
    "scene-0207",
    "scene-0208",
    "scene-0209",
    "scene-0210",
    "scene-0211",
    "scene-0212",
    "scene-0213",
    "scene-0214",
    "scene-0218",
    "scene-0219",
    "scene-0220",
    "scene-0222",
    "scene-0224",
    "scene-0225",
    "scene-0226",
    "scene-0227",
    "scene-0228",
    "scene-0229",
    "scene-0230",
    "scene-0231",
    "scene-0232",
    "scene-0233",
    "scene-0234",
    "scene-0235",
    "scene-0236",
    "scene-0237",
    "scene-0238",
    "scene-0239",
    "scene-0240",
    "scene-0241",
    "scene-0242",
    "scene-0243",
    "scene-0244",
    "scene-0245",
    "scene-0246",
    "scene-0247",
    "scene-0248",
    "scene-0249",
    "scene-0250",
    "scene-0251",
    "scene-0252",
    "scene-0253",
    "scene-0254",
    "scene-0255",
    "scene-0256",
    "scene-0257",
    "scene-0258",
    "scene-0259",
    "scene-0260",
    "scene-0261",
    "scene-0262",
    "scene-0263",
    "scene-0264",
    "scene-0283",
    "scene-0284",
    "scene-0285",
    "scene-0286",
    "scene-0287",
    "scene-0288",
    "scene-0289",
    "scene-0290",
    "scene-0291",
    "scene-0292",
    "scene-0293",
    "scene-0294",
    "scene-0295",
    "scene-0296",
    "scene-0297",
    "scene-0298",
    "scene-0299",
    "scene-0300",
    "scene-0301",
    "scene-0302",
    "scene-0303",
    "scene-0304",
    "scene-0305",
    "scene-0306",
    "scene-0315",
    "scene-0316",
    "scene-0317",
    "scene-0318",
    "scene-0321",
    "scene-0323",
    "scene-0324",
    "scene-0328",
    "scene-0347",
    "scene-0348",
    "scene-0349",
    "scene-0350",
    "scene-0351",
    "scene-0352",
    "scene-0353",
    "scene-0354",
    "scene-0355",
    "scene-0356",
    "scene-0357",
    "scene-0358",
    "scene-0359",
    "scene-0360",
    "scene-0361",
    "scene-0362",
    "scene-0363",
    "scene-0364",
    "scene-0365",
    "scene-0366",
    "scene-0367",
    "scene-0368",
    "scene-0369",
    "scene-0370",
    "scene-0371",
    "scene-0372",
    "scene-0373",
    "scene-0374",
    "scene-0375",
    "scene-0376",
    "scene-0377",
    "scene-0378",
    "scene-0379",
    "scene-0380",
    "scene-0381",
    "scene-0382",
    "scene-0383",
    "scene-0384",
    "scene-0385",
    "scene-0386",
    "scene-0388",
    "scene-0389",
    "scene-0390",
    "scene-0391",
    "scene-0392",
    "scene-0393",
    "scene-0394",
    "scene-0395",
    "scene-0396",
    "scene-0397",
    "scene-0398",
    "scene-0399",
    "scene-0400",
    "scene-0401",
    "scene-0402",
    "scene-0403",
    "scene-0405",
    "scene-0406",
    "scene-0407",
    "scene-0408",
    "scene-0410",
    "scene-0411",
    "scene-0412",
    "scene-0413",
    "scene-0414",
    "scene-0415",
    "scene-0416",
    "scene-0417",
    "scene-0418",
    "scene-0419",
    "scene-0420",
    "scene-0421",
    "scene-0422",
    "scene-0423",
    "scene-0424",
    "scene-0425",
    "scene-0426",
    "scene-0427",
    "scene-0428",
    "scene-0429",
    "scene-0430",
    "scene-0431",
    "scene-0432",
    "scene-0433",
    "scene-0434",
    "scene-0435",
    "scene-0436",
    "scene-0437",
    "scene-0438",
    "scene-0439",
    "scene-0440",
    "scene-0441",
    "scene-0442",
    "scene-0443",
    "scene-0444",
    "scene-0445",
    "scene-0446",
    "scene-0447",
    "scene-0448",
    "scene-0449",
    "scene-0450",
    "scene-0451",
    "scene-0452",
    "scene-0453",
    "scene-0454",
    "scene-0455",
    "scene-0456",
    "scene-0457",
    "scene-0458",
    "scene-0459",
    "scene-0461",
    "scene-0462",
    "scene-0463",
    "scene-0464",
    "scene-0465",
    "scene-0467",
    "scene-0468",
    "scene-0469",
    "scene-0471",
    "scene-0472",
    "scene-0474",
    "scene-0475",
    "scene-0476",
    "scene-0477",
    "scene-0478",
    "scene-0479",
    "scene-0480",
    "scene-0499",
    "scene-0500",
    "scene-0501",
    "scene-0502",
    "scene-0504",
    "scene-0505",
    "scene-0506",
    "scene-0507",
    "scene-0508",
    "scene-0509",
    "scene-0510",
    "scene-0511",
    "scene-0512",
    "scene-0513",
    "scene-0514",
    "scene-0515",
    "scene-0517",
    "scene-0518",
    "scene-0525",
    "scene-0526",
    "scene-0527",
    "scene-0528",
    "scene-0529",
    "scene-0530",
    "scene-0531",
    "scene-0532",
    "scene-0533",
    "scene-0534",
    "scene-0535",
    "scene-0536",
    "scene-0537",
    "scene-0538",
    "scene-0539",
    "scene-0541",
    "scene-0542",
    "scene-0543",
    "scene-0544",
    "scene-0545",
    "scene-0546",
    "scene-0566",
    "scene-0568",
    "scene-0570",
    "scene-0571",
    "scene-0572",
    "scene-0573",
    "scene-0574",
    "scene-0575",
    "scene-0576",
    "scene-0577",
    "scene-0578",
    "scene-0580",
    "scene-0582",
    "scene-0583",
    "scene-0584",
    "scene-0585",
    "scene-0586",
    "scene-0587",
    "scene-0588",
    "scene-0589",
    "scene-0590",
    "scene-0591",
    "scene-0592",
    "scene-0593",
    "scene-0594",
    "scene-0595",
    "scene-0596",
    "scene-0597",
    "scene-0598",
    "scene-0599",
    "scene-0600",
    "scene-0639",
    "scene-0640",
    "scene-0641",
    "scene-0642",
    "scene-0643",
    "scene-0644",
    "scene-0645",
    "scene-0646",
    "scene-0647",
    "scene-0648",
    "scene-0649",
    "scene-0650",
    "scene-0651",
    "scene-0652",
    "scene-0653",
    "scene-0654",
    "scene-0655",
    "scene-0656",
    "scene-0657",
    "scene-0658",
    "scene-0659",
    "scene-0660",
    "scene-0661",
    "scene-0662",
    "scene-0663",
    "scene-0664",
    "scene-0665",
    "scene-0666",
    "scene-0667",
    "scene-0668",
    "scene-0669",
    "scene-0670",
    "scene-0671",
    "scene-0672",
    "scene-0673",
    "scene-0674",
    "scene-0675",
    "scene-0676",
    "scene-0677",
    "scene-0678",
    "scene-0679",
    "scene-0681",
    "scene-0683",
    "scene-0684",
    "scene-0685",
    "scene-0686",
    "scene-0687",
    "scene-0688",
    "scene-0689",
    "scene-0695",
    "scene-0696",
    "scene-0697",
    "scene-0698",
    "scene-0700",
    "scene-0701",
    "scene-0703",
    "scene-0704",
    "scene-0705",
    "scene-0706",
    "scene-0707",
    "scene-0708",
    "scene-0709",
    "scene-0710",
    "scene-0711",
    "scene-0712",
    "scene-0713",
    "scene-0714",
    "scene-0715",
    "scene-0716",
    "scene-0717",
    "scene-0718",
    "scene-0719",
    "scene-0726",
    "scene-0727",
    "scene-0728",
    "scene-0730",
    "scene-0731",
    "scene-0733",
    "scene-0734",
    "scene-0735",
    "scene-0736",
    "scene-0737",
    "scene-0738",
    "scene-0739",
    "scene-0740",
    "scene-0741",
    "scene-0744",
    "scene-0746",
    "scene-0747",
    "scene-0749",
    "scene-0750",
    "scene-0751",
    "scene-0752",
    "scene-0757",
    "scene-0758",
    "scene-0759",
    "scene-0760",
    "scene-0761",
    "scene-0762",
    "scene-0763",
    "scene-0764",
    "scene-0765",
    "scene-0767",
    "scene-0768",
    "scene-0769",
    "scene-0786",
    "scene-0787",
    "scene-0789",
    "scene-0790",
    "scene-0791",
    "scene-0792",
    "scene-0803",
    "scene-0804",
    "scene-0805",
    "scene-0806",
    "scene-0808",
    "scene-0809",
    "scene-0810",
    "scene-0811",
    "scene-0812",
    "scene-0813",
    "scene-0815",
    "scene-0816",
    "scene-0817",
    "scene-0819",
    "scene-0820",
    "scene-0821",
    "scene-0822",
    "scene-0847",
    "scene-0848",
    "scene-0849",
    "scene-0850",
    "scene-0851",
    "scene-0852",
    "scene-0853",
    "scene-0854",
    "scene-0855",
    "scene-0856",
    "scene-0858",
    "scene-0860",
    "scene-0861",
    "scene-0862",
    "scene-0863",
    "scene-0864",
    "scene-0865",
    "scene-0866",
    "scene-0868",
    "scene-0869",
    "scene-0870",
    "scene-0871",
    "scene-0872",
    "scene-0873",
    "scene-0875",
    "scene-0876",
    "scene-0877",
    "scene-0878",
    "scene-0880",
    "scene-0882",
    "scene-0883",
    "scene-0884",
    "scene-0885",
    "scene-0886",
    "scene-0887",
    "scene-0888",
    "scene-0889",
    "scene-0890",
    "scene-0891",
    "scene-0892",
    "scene-0893",
    "scene-0894",
    "scene-0895",
    "scene-0896",
    "scene-0897",
    "scene-0898",
    "scene-0899",
    "scene-0900",
    "scene-0901",
    "scene-0902",
    "scene-0903",
    "scene-0945",
    "scene-0947",
    "scene-0949",
    "scene-0952",
    "scene-0953",
    "scene-0955",
    "scene-0956",
    "scene-0957",
    "scene-0958",
    "scene-0959",
    "scene-0960",
    "scene-0961",
    "scene-0975",
    "scene-0976",
    "scene-0977",
    "scene-0978",
    "scene-0979",
    "scene-0980",
    "scene-0981",
    "scene-0982",
    "scene-0983",
    "scene-0984",
    "scene-0988",
    "scene-0989",
    "scene-0990",
    "scene-0991",
    "scene-0992",
    "scene-0994",
    "scene-0995",
    "scene-0996",
    "scene-0997",
    "scene-0998",
    "scene-0999",
    "scene-1000",
    "scene-1001",
    "scene-1002",
    "scene-1003",
    "scene-1004",
    "scene-1005",
    "scene-1006",
    "scene-1007",
    "scene-1008",
    "scene-1009",
    "scene-1010",
    "scene-1011",
    "scene-1012",
    "scene-1013",
    "scene-1014",
    "scene-1015",
    "scene-1016",
    "scene-1017",
    "scene-1018",
    "scene-1019",
    "scene-1020",
    "scene-1021",
    "scene-1022",
    "scene-1023",
    "scene-1024",
    "scene-1025",
    "scene-1044",
    "scene-1045",
    "scene-1046",
    "scene-1047",
    "scene-1048",
    "scene-1049",
    "scene-1050",
    "scene-1051",
    "scene-1052",
    "scene-1053",
    "scene-1054",
    "scene-1055",
    "scene-1056",
    "scene-1057",
    "scene-1058",
    "scene-1074",
    "scene-1075",
    "scene-1076",
    "scene-1077",
    "scene-1078",
    "scene-1079",
    "scene-1080",
    "scene-1081",
    "scene-1082",
    "scene-1083",
    "scene-1084",
    "scene-1085",
    "scene-1086",
    "scene-1087",
    "scene-1088",
    "scene-1089",
    "scene-1090",
    "scene-1091",
    "scene-1092",
    "scene-1093",
    "scene-1094",
    "scene-1095",
    "scene-1096",
    "scene-1097",
    "scene-1098",
    "scene-1099",
    "scene-1100",
    "scene-1101",
    "scene-1102",
    "scene-1104",
    "scene-1105",
    "scene-1106",
    "scene-1107",
    "scene-1108",
    "scene-1109",
    "scene-1110",
)

_VAL = (
    "scene-0003",
    "scene-0012",
    "scene-0013",
    "scene-0014",
    "scene-0015",
    "scene-0016",
    "scene-0017",
    "scene-0018",
    "scene-0035",
    "scene-0036",
    "scene-0038",
    "scene-0039",
    "scene-0092",
    "scene-0093",
    "scene-0094",
    "scene-0095",
    "scene-0096",
    "scene-0097",
    "scene-0098",
    "scene-0099",
    "scene-0100",
    "scene-0101",
    "scene-0102",
    "scene-0103",
    "scene-0104",
    "scene-0105",
    "scene-0106",
    "scene-0107",
    "scene-0108",
    "scene-0109",
    "scene-0110",
    "scene-0221",
    "scene-0268",
    "scene-0269",
    "scene-0270",
    "scene-0271",
    "scene-0272",
    "scene-0273",
    "scene-0274",
    "scene-0275",
    "scene-0276",
    "scene-0277",
    "scene-0278",
    "scene-0329",
    "scene-0330",
    "scene-0331",
    "scene-0332",
    "scene-0344",
    "scene-0345",
    "scene-0346",
    "scene-0519",
    "scene-0520",
    "scene-0521",
    "scene-0522",
    "scene-0523",
    "scene-0524",
    "scene-0552",
    "scene-0553",
    "scene-0554",
    "scene-0555",
    "scene-0556",
    "scene-0557",
    "scene-0558",
    "scene-0559",
    "scene-0560",
    "scene-0561",
    "scene-0562",
    "scene-0563",
    "scene-0564",
    "scene-0565",
    "scene-0625",
    "scene-0626",
    "scene-0627",
    "scene-0629",
    "scene-0630",
    "scene-0632",
    "scene-0633",
    "scene-0634",
    "scene-0635",
    "scene-0636",
    "scene-0637",
    "scene-0638",
    "scene-0770",
    "scene-0771",
    "scene-0775",
    "scene-0777",
    "scene-0778",
    "scene-0780",
    "scene-0781",
    "scene-0782",
    "scene-0783",
    "scene-0784",
    "scene-0794",
    "scene-0795",
    "scene-0796",
    "scene-0797",
    "scene-0798",
    "scene-0799",
    "scene-0800",
    "scene-0802",
    "scene-0904",
    "scene-0905",
    "scene-0906",
    "scene-0907",
    "scene-0908",
    "scene-0909",
    "scene-0910",
    "scene-0911",
    "scene-0912",
    "scene-0913",
    "scene-0914",
    "scene-0915",
    "scene-0916",
    "scene-0917",
    "scene-0919",
    "scene-0920",
    "scene-0921",
    "scene-0922",
    "scene-0923",
    "scene-0924",
    "scene-0925",
    "scene-0926",
    "scene-0927",
    "scene-0928",
    "scene-0929",
    "scene-0930",
    "scene-0931",
    "scene-0962",
    "scene-0963",
    "scene-0966",
    "scene-0967",
    "scene-0968",
    "scene-0969",
    "scene-0971",
    "scene-0972",
    "scene-1059",
    "scene-1060",
    "scene-1061",
    "scene-1062",
    "scene-1063",
    "scene-1064",
    "scene-1065",
    "scene-1066",
    "scene-1067",
    "scene-1068",
    "scene-1069",
    "scene-1070",
    "scene-1071",
    "scene-1072",
    "scene-1073",
)

_TEST = (
    "scene-0077",
    "scene-0078",
    "scene-0079",
    "scene-0080",
    "scene-0081",
    "scene-0082",
    "scene-0083",
    "scene-0084",
    "scene-0085",
    "scene-0086",
    "scene-0087",
    "scene-0088",
    "scene-0089",
    "scene-0090",
    "scene-0091",
    "scene-0111",
    "scene-0112",
    "scene-0113",
    "scene-0114",
    "scene-0115",
    "scene-0116",
    "scene-0117",
    "scene-0118",
    "scene-0119",
    "scene-0140",
    "scene-0142",
    "scene-0143",
    "scene-0144",
    "scene-0145",
    "scene-0146",
    "scene-0147",
    "scene-0148",
    "scene-0265",
    "scene-0266",
    "scene-0279",
    "scene-0280",
    "scene-0281",
    "scene-0282",
    "scene-0307",
    "scene-0308",
    "scene-0309",
    "scene-0310",
    "scene-0311",
    "scene-0312",
    "scene-0313",
    "scene-0314",
    "scene-0333",
    "scene-0334",
    "scene-0335",
    "scene-0336",
    "scene-0337",
    "scene-0338",
    "scene-0339",
    "scene-0340",
    "scene-0341",
    "scene-0342",
    "scene-0343",
    "scene-0481",
    "scene-0482",
    "scene-0483",
    "scene-0484",
    "scene-0485",
    "scene-0486",
    "scene-0487",
    "scene-0488",
    "scene-0489",
    "scene-0490",
    "scene-0491",
    "scene-0492",
    "scene-0493",
    "scene-0494",
    "scene-0495",
    "scene-0496",
    "scene-0497",
    "scene-0498",
    "scene-0547",
    "scene-0548",
    "scene-0549",
    "scene-0550",
    "scene-0551",
    "scene-0601",
    "scene-0602",
    "scene-0603",
    "scene-0604",
    "scene-0606",
    "scene-0607",
    "scene-0608",
    "scene-0609",
    "scene-0610",
    "scene-0611",
    "scene-0612",
    "scene-0613",
    "scene-0614",
    "scene-0615",
    "scene-0616",
    "scene-0617",
    "scene-0618",
    "scene-0619",
    "scene-0620",
    "scene-0621",
    "scene-0622",
    "scene-0623",
    "scene-0624",
    "scene-0827",
    "scene-0828",
    "scene-0829",
    "scene-0830",
    "scene-0831",
    "scene-0833",
    "scene-0834",
    "scene-0835",
    "scene-0836",
    "scene-0837",
    "scene-0838",
    "scene-0839",
    "scene-0840",
    "scene-0841",
    "scene-0842",
    "scene-0844",
    "scene-0845",
    "scene-0846",
    "scene-0932",
    "scene-0933",
    "scene-0935",
    "scene-0936",
    "scene-0937",
    "scene-0938",
    "scene-0939",
    "scene-0940",
    "scene-0941",
    "scene-0942",
    "scene-0943",
    "scene-1026",
    "scene-1027",
    "scene-1028",
    "scene-1029",
    "scene-1030",
    "scene-1031",
    "scene-1032",
    "scene-1033",
    "scene-1034",
    "scene-1035",
    "scene-1036",
    "scene-1037",
    "scene-1038",
    "scene-1039",
    "scene-1040",
    "scene-1041",
    "scene-1042",
    "scene-1043",
)

_MINI_TRAIN = (
    "scene-0061",
    "scene-0553",
    "scene-0655",
    "scene-0757",
    "scene-0796",
    "scene-1077",
    "scene-1094",
    "scene-1100",
)

_MINI_VAL = ("scene-0103", "scene-0916")

_VERSION_SPLITS = {
    "v1.0-mini": {"train": _MINI_TRAIN, "val": _MINI_VAL},
    "v1.0-trainval": {"train": _TRAIN, "val": _VAL},
    "v1.0-test": {"test": _TEST},
}


class _Category(TypedDict):
    token: ReadOnly[str]
    name: ReadOnly[str]
    description: ReadOnly[str]


class _Instance(TypedDict):
    token: ReadOnly[str]
    category_token: ReadOnly[str]
    nbr_annotations: ReadOnly[int]
    first_annotation_token: ReadOnly[str]
    last_annotation_token: ReadOnly[str]


class _Sensor(TypedDict):
    token: ReadOnly[str]
    channel: ReadOnly[str]
    modality: ReadOnly[str]


class _CalibratedSensor(TypedDict):
    token: ReadOnly[str]
    sensor_token: ReadOnly[str]
    translation: ReadOnly[list[float]]
    rotation: ReadOnly[list[float]]
    # ``camera_intrinsic`` is a 3x3 matrix for cameras and an empty list for
    # other modalities (lidar, radar).
    camera_intrinsic: ReadOnly[list[list[float]]]


class _EgoPose(TypedDict):
    token: ReadOnly[str]
    timestamp: ReadOnly[int]
    rotation: ReadOnly[list[float]]
    translation: ReadOnly[list[float]]


class _Scene(TypedDict):
    token: ReadOnly[str]
    log_token: ReadOnly[str]
    nbr_samples: ReadOnly[int]
    first_sample_token: ReadOnly[str]
    last_sample_token: ReadOnly[str]
    name: ReadOnly[str]
    description: ReadOnly[str]


class _Sample(TypedDict):
    token: ReadOnly[str]
    timestamp: ReadOnly[int]
    prev: ReadOnly[str]
    next: ReadOnly[str]
    scene_token: ReadOnly[str]
    # ``data`` and ``anns`` are populated by ``_NuScenesDB.__init__`` and
    # remain writable so the parser can assign and mutate them.
    data: dict[str, str]
    anns: list[str]


class _SampleData(TypedDict):
    token: ReadOnly[str]
    sample_token: ReadOnly[str]
    ego_pose_token: ReadOnly[str]
    calibrated_sensor_token: ReadOnly[str]
    timestamp: ReadOnly[int]
    fileformat: ReadOnly[str]
    is_key_frame: ReadOnly[bool]
    height: ReadOnly[int]
    width: ReadOnly[int]
    filename: ReadOnly[str]
    prev: ReadOnly[str]
    next: ReadOnly[str]
    # ``channel``/``sensor_modality`` are populated by ``_NuScenesDB.__init__``.
    channel: str
    sensor_modality: str


class _SampleAnnotation(TypedDict):
    token: ReadOnly[str]
    sample_token: ReadOnly[str]
    instance_token: ReadOnly[str]
    visibility_token: ReadOnly[str]
    attribute_tokens: ReadOnly[list[str]]
    translation: ReadOnly[list[float]]
    size: ReadOnly[list[float]]
    rotation: ReadOnly[list[float]]
    prev: ReadOnly[str]
    next: ReadOnly[str]
    num_lidar_pts: ReadOnly[int]
    num_radar_pts: ReadOnly[int]
    # ``category_name`` is populated by ``_NuScenesDB.__init__``.
    category_name: str


class _NuScenesDB:
    """Minimal JSON-table parser for nuScenes.

    Replaces ``nuscenes.nuscenes.NuScenes`` from the nuscenes-devkit. Loads
    the tables required for 3D object detection and builds the same reverse
    indexes and decorations as the devkit:

    - ``sample[i]["data"]``: ``{channel -> sample_data token}`` (key-frames
      only).
    - ``sample[i]["anns"]``: list of annotation tokens belonging to the
      sample.
    - ``sample_annotation[i]["category_name"]``: fine-grained category
      string, joined through ``instance -> category``.
    - ``sample_data[i]["channel"]`` / ``["sensor_modality"]``: joined through
      ``calibrated_sensor -> sensor``.

    Records are returned by reference.
    """

    def __init__(self, dataroot: str | os.PathLike[str], version: str) -> None:
        self.dataroot = Path(dataroot)
        self.version = version
        table_root = self.dataroot / version
        if not table_root.is_dir():
            msg = f"nuScenes tables not found at {str(table_root)!r}."
            raise FileNotFoundError(msg)

        def _load(name: str) -> Any:
            with (table_root / f"{name}.json").open() as f:
                return json.load(f)

        self.category: list[_Category] = _load("category")
        self.instance: list[_Instance] = _load("instance")
        self.sensor: list[_Sensor] = _load("sensor")
        self.calibrated_sensor: list[_CalibratedSensor] = _load("calibrated_sensor")
        self.ego_pose: list[_EgoPose] = _load("ego_pose")
        self.scene: list[_Scene] = _load("scene")
        self.sample: list[_Sample] = _load("sample")
        self.sample_data: list[_SampleData] = _load("sample_data")
        self.sample_annotation: list[_SampleAnnotation] = _load("sample_annotation")

        # Token -> table index, per table.
        self._token2ind: dict[str, dict[str, int]] = {
            name: {rec["token"]: i for i, rec in enumerate(getattr(self, name))}
            for name in _TABLE_NAMES
        }

        # Decorate sample_annotation with `category_name`.
        for ann in self.sample_annotation:
            inst = self.get("instance", ann["instance_token"])
            ann["category_name"] = self.get("category", inst["category_token"])["name"]

        # Build per-sample shortcuts: keyframe sensor data and annotation tokens.
        for sample in self.sample:
            sample["data"] = dict[str, str]()
            sample["anns"] = list[str]()
        # Decorate every sample_data with `channel`/`sensor_modality`.
        for sd in self.sample_data:
            cs = self.get("calibrated_sensor", sd["calibrated_sensor_token"])
            sensor = self.get("sensor", cs["sensor_token"])
            channel = sensor["channel"]
            sd["channel"] = channel
            sd["sensor_modality"] = sensor["modality"]
            if sd["is_key_frame"]:
                self.get("sample", sd["sample_token"])["data"][channel] = sd["token"]
        for ann in self.sample_annotation:
            self.get("sample", ann["sample_token"])["anns"].append(ann["token"])

    @overload
    def get(self, table_name: Literal["category"], token: str) -> _Category: ...
    @overload
    def get(self, table_name: Literal["instance"], token: str) -> _Instance: ...
    @overload
    def get(self, table_name: Literal["sensor"], token: str) -> _Sensor: ...
    @overload
    def get(
        self, table_name: Literal["calibrated_sensor"], token: str
    ) -> _CalibratedSensor: ...
    @overload
    def get(self, table_name: Literal["ego_pose"], token: str) -> _EgoPose: ...
    @overload
    def get(self, table_name: Literal["scene"], token: str) -> _Scene: ...
    @overload
    def get(self, table_name: Literal["sample"], token: str) -> _Sample: ...
    @overload
    def get(self, table_name: Literal["sample_data"], token: str) -> _SampleData: ...
    @overload
    def get(
        self, table_name: Literal["sample_annotation"], token: str
    ) -> _SampleAnnotation: ...
    def get(self, table_name: str, token: str) -> Any:
        """Return the record with the given token from ``table_name``."""
        return getattr(self, table_name)[self._token2ind[table_name][token]]


[docs] class NuScenes3D(Dataset[tuple[FusionInputs, SampleTargets]]): """`nuScenes <https://www.nuscenes.org/>`_ 3D object detection dataset. Returns samples in the **lidar frame** with annotations as :class:`~vision3d.tensors.BoundingBoxes3D` in ``XYZLWHY`` format. Multi-camera images, intrinsics, and lidar-to-camera extrinsics are returned for all 6 cameras. Args: root (str or pathlib.Path): Root directory of the nuScenes dataset. version (str): Dataset version. Default: ``"v1.0-mini"``. split (str): One of ``"train"`` or ``"val"``. Default: ``"train"``. transforms (Callable, optional): A function/transform that takes input sample and its target as entry and returns a transformed version. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. Only the publicly available ``v1.0-mini`` split is supported. Other versions require manual download from `nuscenes.org <https://www.nuscenes.org/>`_. """ # Camera ordering for consistent multi-camera tensor layout camera_names: ClassVar[tuple[str, ...]] = ( "CAM_FRONT", "CAM_FRONT_RIGHT", "CAM_BACK_RIGHT", "CAM_BACK", "CAM_BACK_LEFT", "CAM_FRONT_LEFT", ) # Row-major layout matching the physical rig. Indices reference # ``camera_names`` positions. Rows may have different lengths. camera_grid: ClassVar[tuple[tuple[int, ...], ...] | None] = ( (5, 0, 1), # CAM_FRONT_LEFT, CAM_FRONT, CAM_FRONT_RIGHT (2, 3, 4), # CAM_BACK_RIGHT, CAM_BACK, CAM_BACK_LEFT ) classes: ClassVar[tuple[str, ...]] = _DETECTION_NAMES class_to_idx: ClassVar[dict[str, int]] = {name: i for i, name in enumerate(classes)} data_url: ClassVar[str] = "https://www.nuscenes.org/data/" mini_archive: ClassVar[str] = "v1.0-mini.tgz" def __init__( self, root: str | os.PathLike[str], version: str = "v1.0-mini", split: str = "train", transforms: Any | None = None, download: bool = False, ) -> None: self.root = Path(root) self.version = version self.split = split self.transforms = transforms if download: self.download() if not self._check_exists(): raise RuntimeError( f"Dataset not found at {str(self.root)!r}. " f"You may use download=True to download the v1.0-mini split. " f"Other versions require manual download from " f"https://www.nuscenes.org." ) self._nusc = _NuScenesDB(dataroot=self.root, version=version) # Collect sample tokens for the requested split split_scenes = _get_split_scenes(version, split) self._sample_tokens: list[str] = [] for scene in self._nusc.scene: if scene["name"] in split_scenes: token = scene["first_sample_token"] while token: self._sample_tokens.append(token) sample = self._nusc.get("sample", token) token = sample["next"] def __len__(self) -> int: """Return the number of samples.""" return len(self._sample_tokens) def _check_exists(self) -> bool: return (self.root / self.version).is_dir()
[docs] def download(self) -> None: """Download the nuScenes dataset if it doesn't exist already. Only the publicly available ``v1.0-mini`` split is supported. Other versions require manual download from `nuscenes.org <https://www.nuscenes.org/>`_. Raises: RuntimeError: If ``version`` is not ``"v1.0-mini"``. """ if self._check_exists(): return if self.version != "v1.0-mini": msg = ( f"Automatic download is only supported for v1.0-mini. Got " f"version={self.version!r}. Other versions require manual " f"download from https://www.nuscenes.org." ) raise RuntimeError(msg) self.root.mkdir(parents=True, exist_ok=True) download_and_extract_archive( url=f"{self.data_url}{self.mini_archive}", download_root=str(self.root), filename=self.mini_archive, )
@override def __getitem__(self, index: int) -> tuple[FusionInputs, SampleTargets]: """Load a single sample. Args: index (int): Index. Returns: Tuple of ``(inputs, targets)``. **inputs** is a dict with keys: - ``"points"``: :class:`PointCloud3D` in lidar frame ``[N, 5]`` (x, y, z, intensity, ring_index). - ``"images"``: :class:`CameraImages` ``[6, 3, H, W]``. - ``"extrinsics"``: :class:`CameraExtrinsics` ``[6, 4, 4]`` (lidar-to-camera). - ``"intrinsics"``: :class:`CameraIntrinsics` ``[6, 3, 3]``. **targets** is a dict with keys: - ``"boxes"``: :class:`BoundingBoxes3D` in lidar frame, format ``XYZLWHY``. - ``"labels"``: :class:`~torch.Tensor` of class indices. """ sample = self._nusc.get("sample", self._sample_tokens[index]) # Lidar lidar_data = self._nusc.get("sample_data", sample["data"]["LIDAR_TOP"]) points = self._load_lidar(lidar_data) lidar_ego_pose = self._nusc.get("ego_pose", lidar_data["ego_pose_token"]) lidar_calib = self._nusc.get( "calibrated_sensor", lidar_data["calibrated_sensor_token"] ) # Transform from lidar to global lidar_to_global = _make_transform( lidar_ego_pose["translation"], lidar_ego_pose["rotation"], ) @ _make_transform( lidar_calib["translation"], lidar_calib["rotation"], ) # Cameras images_list = [] intrinsics_list = [] extrinsics_list = [] for cam_name in self.camera_names: cam_data = self._nusc.get("sample_data", sample["data"][cam_name]) cam_calib = self._nusc.get( "calibrated_sensor", cam_data["calibrated_sensor_token"] ) cam_ego_pose = self._nusc.get("ego_pose", cam_data["ego_pose_token"]) # Camera image img = self._load_image(cam_data) images_list.append(img) # Intrinsics K = torch.tensor(cam_calib["camera_intrinsic"], dtype=torch.float32) intrinsics_list.append(K) # Extrinsics: lidar-to-camera cam_to_global = _make_transform( cam_ego_pose["translation"], cam_ego_pose["rotation"], ) @ _make_transform( cam_calib["translation"], cam_calib["rotation"], ) lidar_to_cam = torch.linalg.inv(cam_to_global) @ lidar_to_global extrinsics_list.append(lidar_to_cam) inputs: FusionInputs = { "points": PointCloud3D(points), "images": CameraImages(torch.stack(images_list)), "extrinsics": CameraExtrinsics(torch.stack(extrinsics_list)), "intrinsics": CameraIntrinsics( torch.stack(intrinsics_list), image_size=(images_list[0].shape[-2], images_list[0].shape[-1]), ), } # Annotations (in global frame -> convert to lidar frame) targets = self._load_annotations(sample, lidar_to_global) if self.transforms is not None: inputs, targets = self.transforms(inputs, targets) return inputs, targets def _load_lidar(self, lidar_data: _SampleData) -> Tensor: path = self.root / lidar_data["filename"] points = np.fromfile(path, dtype=np.float32).reshape(-1, 5) return torch.from_numpy(points) def _load_image(self, cam_data: _SampleData) -> Tensor: path = self.root / cam_data["filename"] img = decode_image(str(path), mode=ImageReadMode.RGB) # [3, H, W] uint8 return img.float() / 255.0 def _load_annotations( self, sample: _Sample, lidar_to_global: Tensor ) -> SampleTargets: """Load annotations and convert from global to lidar frame. Returns: Dict with ``"boxes"`` (:class:`BoundingBoxes3D`, XYZLWHY format), ``"labels"`` (int tensor). """ global_to_lidar = torch.linalg.inv(lidar_to_global) global_to_lidar_rot = global_to_lidar[:3, :3].numpy() label_ids: list[int] = [] boxes: list[list[float]] = [] for ann_token in sample["anns"]: ann = self._nusc.get("sample_annotation", ann_token) det_name = _category_to_detection_name(ann["category_name"]) if det_name is None: continue label_ids.append(self.class_to_idx[det_name]) # Center: global -> lidar center_global = torch.tensor( [*ann["translation"], 1.0], dtype=torch.float32 ) center_lidar = (global_to_lidar @ center_global)[:3] # Dimensions: nuScenes stores (w, l, h), we want (l, w, h) w, l, h = ann["size"] # Rotation: quaternion -> yaw yaw = _quaternion_to_yaw(ann["rotation"], global_to_lidar_rot) boxes.append( [ center_lidar[0].item(), center_lidar[1].item(), center_lidar[2].item(), l, w, h, yaw, ] ) if not boxes: return { "boxes": BoundingBoxes3D( torch.zeros(0, 7), format=BoundingBox3DFormat.XYZLWHY ), "labels": torch.zeros(0, dtype=torch.int64), } return { "boxes": BoundingBoxes3D( torch.tensor(boxes, dtype=torch.float32), format=BoundingBox3DFormat.XYZLWHY, ), "labels": torch.tensor(label_ids, dtype=torch.int64), }
def _category_to_detection_name(category_name: str) -> str | None: """Map a fine-grained nuScenes category to a detection class. Returns: The detection class name, or ``None`` for categories that are not part of the detection task (e.g. ``human.pedestrian.personal_mobility``, ``vehicle.emergency.*``). """ return _CATEGORY_TO_DETECTION.get(category_name) def _get_split_scenes(version: str, split: str) -> frozenset[str]: """Return the set of scene names for ``(version, split)``. Raises: ValueError: If the version/split combination is not supported. """ if version not in _VERSION_SPLITS: msg = f"Unsupported version: {version!r}" raise ValueError(msg) split_map = _VERSION_SPLITS[version] if split not in split_map: valid = ", ".join(repr(k) for k in split_map) msg = f"Unsupported split {split!r} for version {version!r}. Valid combinations are: {valid}" raise ValueError(msg) return frozenset(split_map[split]) def _quaternion_to_rotation_matrix(rotation_wxyz: list[float]) -> np.ndarray: """Convert a unit quaternion ``(w, x, y, z)`` to a 3x3 rotation matrix. Returns: ``[3, 3]`` rotation matrix. """ w, x, y, z = rotation_wxyz return np.array( [ [1 - 2 * (y * y + z * z), 2 * (x * y - w * z), 2 * (x * z + w * y)], [2 * (x * y + w * z), 1 - 2 * (x * x + z * z), 2 * (y * z - w * x)], [2 * (x * z - w * y), 2 * (y * z + w * x), 1 - 2 * (x * x + y * y)], ], dtype=np.float64, ) def _make_transform(translation: list[float], rotation_wxyz: list[float]) -> Tensor: """Build a 4x4 transform from translation + quaternion ``(w, x, y, z)``. Returns: ``[4, 4]`` homogeneous transform matrix. """ rot = _quaternion_to_rotation_matrix(rotation_wxyz) T = torch.eye(4, dtype=torch.float32) T[:3, :3] = torch.from_numpy(rot.astype(np.float32)) T[:3, 3] = torch.tensor(translation, dtype=torch.float32) return T def _quaternion_to_yaw( rotation_wxyz: list[float], global_to_lidar_rot: np.ndarray ) -> float: """Convert a global-frame quaternion to yaw angle in lidar frame. Args: rotation_wxyz: Quaternion in wxyz format (global frame). global_to_lidar_rot: ``[3, 3]`` rotation from global to lidar. Returns: Yaw angle in radians. """ rot = _quaternion_to_rotation_matrix(rotation_wxyz) # Box's local +X axis expressed in the lidar frame. forward_lidar = global_to_lidar_rot @ rot @ np.array([1.0, 0.0, 0.0]) return float(np.arctan2(forward_lidar[1], forward_lidar[0]))