tests.data_loader_test
1import random 2 3import fiftyone as fo 4import pytest 5import torch 6from datasets import Dataset, Split 7from fiftyone.utils.huggingface import load_from_hub 8from torch.utils.data import DataLoader 9 10from config.config import ACCEPTED_SPLITS 11from utils.data_loader import FiftyOneTorchDatasetCOCO, TorchToHFDatasetCOCO 12from utils.dataset_loader import get_split 13 14fisheye8k_gt_field = "detections" 15max_samples = 50 16batch_size = 4 17 18 19@pytest.fixture 20def dataset_v51(): 21 """Fixture to load a FiftyOne dataset from the hub.""" 22 dataset_name_hub = "Voxel51/fisheye8k" 23 dataset_name = "fisheye8k_pytest" 24 try: 25 dataset = load_from_hub( 26 repo_id=dataset_name_hub, max_samples=max_samples, name=dataset_name 27 ) 28 # Ensure that all splits are represented (normally Data Engine takes care of that) 29 for sample in dataset.iter_samples(progress=True, autosave=True): 30 sample.tags = [random.choice(ACCEPTED_SPLITS)] 31 except: 32 dataset = fo.load_dataset(dataset_name) 33 return dataset 34 35 36@pytest.fixture 37def dataset_v51_no_splits_no_detections(): 38 """Fixture to load a FiftyOne dataset from the hub.""" 39 dataset_name_hub = "Voxel51/fisheye8k" 40 dataset_name = "fisheye8k_pytest_raw" 41 try: 42 dataset = load_from_hub( 43 repo_id=dataset_name_hub, max_samples=max_samples, name=dataset_name 44 ) 45 # Remove all tags 46 for sample in dataset.iter_samples(progress=True, autosave=True): 47 sample.tags = [] 48 49 # Remove detection field 50 dataset.delete_sample_field(fisheye8k_gt_field) 51 except: 52 dataset = fo.load_dataset(dataset_name) 53 return dataset 54 55 56def test_conversions_on_raw_dataset(dataset_v51_no_splits_no_detections): 57 "Test if conversions work with a V51 without labels or a split" 58 torch_dataset = FiftyOneTorchDatasetCOCO( 59 dataset_v51_no_splits_no_detections, gt_field=None 60 ) 61 hf_dataset_converter = TorchToHFDatasetCOCO(torch_dataset) 62 hf_dataset = hf_dataset_converter.convert() 63 64 assert torch_dataset is not None 65 assert hf_dataset is not None 66 67 68def test_dataset_v51(dataset_v51): 69 assert dataset_v51 is not None 70 71 72# Tests for torch dataset 73@pytest.fixture 74def torch_dataset(dataset_v51): 75 """Fixture to create a FiftyOneTorchDatasetCOCO instance.""" 76 return FiftyOneTorchDatasetCOCO(dataset_v51, gt_field=fisheye8k_gt_field) 77 78 79def test_torch_dataset_length(torch_dataset): 80 """Test the length of the torch dataset.""" 81 assert len(torch_dataset) == max_samples 82 83 84@pytest.mark.parametrize("index", [0, 1, 2]) 85def test_torch_dataset_getitem(torch_dataset, index): 86 """Test getting an item from the torch dataset.""" 87 img, target = torch_dataset[index] 88 assert isinstance(img, torch.Tensor) 89 assert "bbox" in target 90 assert "category_id" in target 91 assert "image_id" in target 92 assert "area" in target 93 assert "iscrowd" in target 94 95 96def test_torch_dataset_getitem_invalid_index(torch_dataset): 97 """Test getting an item with an invalid index from the torch dataset.""" 98 test_index = max_samples * 10 99 with pytest.raises(IndexError): 100 torch_dataset[test_index] 101 102 103def test_torch_dataset_getitems(torch_dataset): 104 """Test getting multiple items from the torch dataset.""" 105 samples = torch_dataset.__getitems__([0, 1, 2]) 106 assert len(samples) == 3 107 for img, target in samples: 108 assert isinstance(img, torch.Tensor) 109 assert "bbox" in target 110 111 112def test_torch_dataset_getitems_invalid_indices(torch_dataset): 113 """Test getting multiple items with invalid indices from the torch dataset.""" 114 test_index_1 = max_samples * 10 115 test_index_2 = test_index_1 + 1 116 with pytest.raises(IndexError): 117 torch_dataset.__getitems__([test_index_1, test_index_2]) 118 119 120def test_torch_dataset_get_classes(torch_dataset): 121 """Test getting classes from the torch dataset.""" 122 classes = torch_dataset.get_classes() 123 assert isinstance(classes, list) 124 125 126def test_torch_dataset_get_splits(torch_dataset): 127 """Test getting splits from the torch dataset.""" 128 splits = torch_dataset.get_splits() 129 # Test return type is set 130 assert isinstance(splits, set), "get_splits() should return a set" 131 132 # Empty splits are allowed 133 if not splits: 134 return 135 136 # If splits exist, they must be subset of ACCEPTED_SPLITS 137 assert splits.issubset( 138 set(ACCEPTED_SPLITS) 139 ), f"Invalid splits found: {splits} All splits must be one of {ACCEPTED_SPLITS}" 140 141 142# Tests for torch dataloader 143@pytest.fixture 144def dataloader(torch_dataset): 145 """Fixture to create a DataLoader instance.""" 146 return DataLoader( 147 torch_dataset, 148 batch_size=batch_size, 149 collate_fn=lambda batch: list(zip(*batch)), 150 shuffle=True, 151 ) 152 153 154def test_dataloader_length(dataloader, torch_dataset): 155 """Test the length of the dataloader.""" 156 assert len(dataloader) == (len(torch_dataset) + 3) // batch_size 157 158 159def test_dataloader_batch(dataloader, torch_dataset): 160 """Test getting a batch from the dataloader.""" 161 total_samples = len(torch_dataset) 162 samples_processed = 0 163 164 for batch in dataloader: 165 imgs, targets = batch 166 current_batch_size = len(imgs) 167 168 # For the last batch, size might be smaller 169 if samples_processed + batch_size > total_samples: 170 expected_size = total_samples - samples_processed 171 assert ( 172 current_batch_size == expected_size 173 ), f"Last batch size should be {expected_size} but got {current_batch_size}" 174 else: 175 assert ( 176 current_batch_size == batch_size 177 ), f"Batch size should be {batch_size} but got {current_batch_size}" 178 179 assert len(targets) == current_batch_size 180 181 for img, target in zip(imgs, targets): 182 assert isinstance(img, torch.Tensor) 183 assert "bbox" in target 184 assert "category_id" in target 185 assert "image_id" in target 186 assert "area" in target 187 assert "iscrowd" in target 188 189 samples_processed += current_batch_size 190 191 # Verify we processed all samples 192 assert ( 193 samples_processed == total_samples 194 ), f"Processed {samples_processed} samples but dataset has {total_samples}" 195 196 197# Tests for HF dataset 198@pytest.fixture 199def converter_torch_hf(torch_dataset): 200 """Fixture to create a TorchToHFDatasetCOCO instance.""" 201 return TorchToHFDatasetCOCO(torch_dataset) 202 203 204def test_hf_dataset_conversion(converter_torch_hf): 205 """Test converting the torch dataset to HF dataset.""" 206 hf_dataset = converter_torch_hf.convert() 207 # Get splits from dataset 208 splits = set(hf_dataset.keys()) 209 210 # Empty splits are allowed 211 if not splits: 212 return 213 214 ACCEPTED_SPLITS_HF = {Split.TRAIN, Split.TEST, Split.VALIDATION} 215 216 # If splits exist, they must be subset of ACCEPTED_SPLITS 217 assert splits.issubset( 218 ACCEPTED_SPLITS_HF 219 ), f"Invalid splits found: {splits} All splits must be one of {ACCEPTED_SPLITS_HF}" 220 221 # Only test instance type for valid splits 222 for split in splits: 223 assert isinstance( 224 hf_dataset[split], Dataset 225 ), f"{split} split should be a Dataset" 226 227 228def test_hf_dataset_sample(converter_torch_hf): 229 """Test getting a sample from the HF dataset.""" 230 hf_dataset = converter_torch_hf.convert() 231 for split in ACCEPTED_SPLITS: 232 if split in hf_dataset: 233 sample = hf_dataset[split][0] 234 assert "image_path" in sample 235 assert "objects" in sample 236 assert "split" in sample 237 238 239def test_hf_dataset_dataloader(converter_torch_hf): 240 """Test creating a DataLoader from the HF dataset.""" 241 hf_dataset = converter_torch_hf.convert() 242 for split in ACCEPTED_SPLITS: 243 if split in hf_dataset: 244 dataloader = DataLoader( 245 hf_dataset[split], 246 batch_size=batch_size, 247 collate_fn=lambda batch: ( 248 [item["image_path"] for item in batch], 249 [item["objects"] for item in batch], 250 [item["split"] for item in batch], 251 ), 252 ) 253 for batch in dataloader: 254 images, targets, splits = batch 255 for img, target, split in zip(images, targets, splits): 256 assert isinstance(img, str) 257 assert isinstance(target["bbox"], list) 258 assert isinstance(target["category_id"], list) 259 assert isinstance(split, str) 260 261 262def test_hf_dataset_with_format(converter_torch_hf): 263 """Test setting the format of the HF dataset.""" 264 hf_dataset = converter_torch_hf.convert() 265 for split in ACCEPTED_SPLITS: 266 if split in hf_dataset: 267 hf_dataset[split] = hf_dataset[split].with_format("torch") 268 sample = hf_dataset[split][0] 269 assert isinstance(sample["image_path"], str) # Includes filepath 270 assert isinstance(sample["objects"]["bbox"], torch.Tensor) 271 assert isinstance(sample["objects"]["category_id"], torch.Tensor) 272 273 274# Tests for incomplete datasets 275@pytest.fixture 276def empty_dataset(): 277 """Fixture to create an empty FiftyOne dataset.""" 278 try: 279 dataset = fo.Dataset(name="empty_dataset") 280 except: 281 dataset = fo.load_dataset("empty_dataset") 282 return dataset 283 284 285@pytest.fixture 286def no_annotations_dataset(): 287 """Fixture to create a FiftyOne dataset with no annotations.""" 288 try: 289 dataset = fo.Dataset(name="no_annotations_dataset") 290 dataset.add_sample(fo.Sample(filepath="image1.jpg")) 291 dataset.add_sample(fo.Sample(filepath="image2.jpg")) 292 except: 293 dataset = fo.load_dataset("no_annotations_dataset") 294 295 return dataset 296 297 298def test_empty_dataset(empty_dataset): 299 """Test creating a torch dataset from an empty FiftyOne dataset.""" 300 dataset = FiftyOneTorchDatasetCOCO(empty_dataset) 301 assert len(dataset) == 0 302 303 304def test_no_annotations_dataset(no_annotations_dataset): 305 """Test creating a torch dataset from a FiftyOne dataset with no annotations.""" 306 dataset = FiftyOneTorchDatasetCOCO(no_annotations_dataset) 307 assert len(dataset) == 2 308 309 310def test_detection_preservation(dataset_v51, torch_dataset, converter_torch_hf): 311 """Test that detections are preserved when converting between dataset formats.""" 312 313 # Get a sample from FiftyOne dataset 314 v51_sample = dataset_v51.first() 315 v51_detections = v51_sample[fisheye8k_gt_field].detections 316 v51_det_count = len(v51_detections) 317 318 # Get corresponding torch sample 319 torch_sample = torch_dataset[0] 320 torch_bboxes = torch_sample[1]["bbox"] 321 torch_categories = torch_sample[1]["category_id"] 322 323 # Build category mapping 324 categories = dataset_v51.distinct(f"{fisheye8k_gt_field}.detections.label") 325 category_map = {label: idx for idx, label in enumerate(categories)} 326 327 # Verify torch detection count matches 328 assert len(torch_bboxes) == v51_det_count 329 assert len(torch_categories) == v51_det_count 330 331 # Convert to HF dataset and get sample 332 hf_dataset = converter_torch_hf.convert() 333 split = get_split(v51_sample) 334 split_mapping = {"train": Split.TRAIN, "val": Split.VALIDATION, "test": Split.TEST} 335 hf_sample = hf_dataset[split_mapping[split]][0] 336 337 # Verify HF detection count matches 338 assert len(hf_sample["objects"]["bbox"]) == v51_det_count 339 assert len(hf_sample["objects"]["category_id"]) == v51_det_count 340 341 img_width = v51_sample.metadata.width 342 img_height = v51_sample.metadata.height 343 344 # Verify detection properties match between V51 and torch 345 for i, v51_det in enumerate(v51_detections): 346 # Check bounding box format conversion 347 v51_bbox = v51_det.bounding_box 348 torch_bbox = torch_sample[1]["bbox"][i].tolist() 349 350 # Verify coordinates with tolerance 351 assert ( 352 abs(v51_bbox[0] - torch_bbox[0] / img_width) < 0.01 353 ) # width normalization 354 assert ( 355 abs(v51_bbox[1] - torch_bbox[1] / img_height) < 0.01 356 ) # height normalization 357 assert abs(v51_bbox[2] - torch_bbox[2] / img_width) < 0.01 358 assert abs(v51_bbox[3] - torch_bbox[3] / img_height) < 0.01 359 360 # Verify category mapping for all classes 361 expected_category = category_map[v51_det.label] 362 assert ( 363 torch_categories[i] == expected_category 364 ), f"Mismatched category for {v51_det.label}" 365 assert hf_sample["objects"]["category_id"][i] == expected_category
20@pytest.fixture 21def dataset_v51(): 22 """Fixture to load a FiftyOne dataset from the hub.""" 23 dataset_name_hub = "Voxel51/fisheye8k" 24 dataset_name = "fisheye8k_pytest" 25 try: 26 dataset = load_from_hub( 27 repo_id=dataset_name_hub, max_samples=max_samples, name=dataset_name 28 ) 29 # Ensure that all splits are represented (normally Data Engine takes care of that) 30 for sample in dataset.iter_samples(progress=True, autosave=True): 31 sample.tags = [random.choice(ACCEPTED_SPLITS)] 32 except: 33 dataset = fo.load_dataset(dataset_name) 34 return dataset
Fixture to load a FiftyOne dataset from the hub.
37@pytest.fixture 38def dataset_v51_no_splits_no_detections(): 39 """Fixture to load a FiftyOne dataset from the hub.""" 40 dataset_name_hub = "Voxel51/fisheye8k" 41 dataset_name = "fisheye8k_pytest_raw" 42 try: 43 dataset = load_from_hub( 44 repo_id=dataset_name_hub, max_samples=max_samples, name=dataset_name 45 ) 46 # Remove all tags 47 for sample in dataset.iter_samples(progress=True, autosave=True): 48 sample.tags = [] 49 50 # Remove detection field 51 dataset.delete_sample_field(fisheye8k_gt_field) 52 except: 53 dataset = fo.load_dataset(dataset_name) 54 return dataset
Fixture to load a FiftyOne dataset from the hub.
57def test_conversions_on_raw_dataset(dataset_v51_no_splits_no_detections): 58 "Test if conversions work with a V51 without labels or a split" 59 torch_dataset = FiftyOneTorchDatasetCOCO( 60 dataset_v51_no_splits_no_detections, gt_field=None 61 ) 62 hf_dataset_converter = TorchToHFDatasetCOCO(torch_dataset) 63 hf_dataset = hf_dataset_converter.convert() 64 65 assert torch_dataset is not None 66 assert hf_dataset is not None
Test if conversions work with a V51 without labels or a split
74@pytest.fixture 75def torch_dataset(dataset_v51): 76 """Fixture to create a FiftyOneTorchDatasetCOCO instance.""" 77 return FiftyOneTorchDatasetCOCO(dataset_v51, gt_field=fisheye8k_gt_field)
Fixture to create a FiftyOneTorchDatasetCOCO instance.
80def test_torch_dataset_length(torch_dataset): 81 """Test the length of the torch dataset.""" 82 assert len(torch_dataset) == max_samples
Test the length of the torch dataset.
85@pytest.mark.parametrize("index", [0, 1, 2]) 86def test_torch_dataset_getitem(torch_dataset, index): 87 """Test getting an item from the torch dataset.""" 88 img, target = torch_dataset[index] 89 assert isinstance(img, torch.Tensor) 90 assert "bbox" in target 91 assert "category_id" in target 92 assert "image_id" in target 93 assert "area" in target 94 assert "iscrowd" in target
Test getting an item from the torch dataset.
97def test_torch_dataset_getitem_invalid_index(torch_dataset): 98 """Test getting an item with an invalid index from the torch dataset.""" 99 test_index = max_samples * 10 100 with pytest.raises(IndexError): 101 torch_dataset[test_index]
Test getting an item with an invalid index from the torch dataset.
104def test_torch_dataset_getitems(torch_dataset): 105 """Test getting multiple items from the torch dataset.""" 106 samples = torch_dataset.__getitems__([0, 1, 2]) 107 assert len(samples) == 3 108 for img, target in samples: 109 assert isinstance(img, torch.Tensor) 110 assert "bbox" in target
Test getting multiple items from the torch dataset.
113def test_torch_dataset_getitems_invalid_indices(torch_dataset): 114 """Test getting multiple items with invalid indices from the torch dataset.""" 115 test_index_1 = max_samples * 10 116 test_index_2 = test_index_1 + 1 117 with pytest.raises(IndexError): 118 torch_dataset.__getitems__([test_index_1, test_index_2])
Test getting multiple items with invalid indices from the torch dataset.
121def test_torch_dataset_get_classes(torch_dataset): 122 """Test getting classes from the torch dataset.""" 123 classes = torch_dataset.get_classes() 124 assert isinstance(classes, list)
Test getting classes from the torch dataset.
127def test_torch_dataset_get_splits(torch_dataset): 128 """Test getting splits from the torch dataset.""" 129 splits = torch_dataset.get_splits() 130 # Test return type is set 131 assert isinstance(splits, set), "get_splits() should return a set" 132 133 # Empty splits are allowed 134 if not splits: 135 return 136 137 # If splits exist, they must be subset of ACCEPTED_SPLITS 138 assert splits.issubset( 139 set(ACCEPTED_SPLITS) 140 ), f"Invalid splits found: {splits} All splits must be one of {ACCEPTED_SPLITS}"
Test getting splits from the torch dataset.
144@pytest.fixture 145def dataloader(torch_dataset): 146 """Fixture to create a DataLoader instance.""" 147 return DataLoader( 148 torch_dataset, 149 batch_size=batch_size, 150 collate_fn=lambda batch: list(zip(*batch)), 151 shuffle=True, 152 )
Fixture to create a DataLoader instance.
155def test_dataloader_length(dataloader, torch_dataset): 156 """Test the length of the dataloader.""" 157 assert len(dataloader) == (len(torch_dataset) + 3) // batch_size
Test the length of the dataloader.
160def test_dataloader_batch(dataloader, torch_dataset): 161 """Test getting a batch from the dataloader.""" 162 total_samples = len(torch_dataset) 163 samples_processed = 0 164 165 for batch in dataloader: 166 imgs, targets = batch 167 current_batch_size = len(imgs) 168 169 # For the last batch, size might be smaller 170 if samples_processed + batch_size > total_samples: 171 expected_size = total_samples - samples_processed 172 assert ( 173 current_batch_size == expected_size 174 ), f"Last batch size should be {expected_size} but got {current_batch_size}" 175 else: 176 assert ( 177 current_batch_size == batch_size 178 ), f"Batch size should be {batch_size} but got {current_batch_size}" 179 180 assert len(targets) == current_batch_size 181 182 for img, target in zip(imgs, targets): 183 assert isinstance(img, torch.Tensor) 184 assert "bbox" in target 185 assert "category_id" in target 186 assert "image_id" in target 187 assert "area" in target 188 assert "iscrowd" in target 189 190 samples_processed += current_batch_size 191 192 # Verify we processed all samples 193 assert ( 194 samples_processed == total_samples 195 ), f"Processed {samples_processed} samples but dataset has {total_samples}"
Test getting a batch from the dataloader.
199@pytest.fixture 200def converter_torch_hf(torch_dataset): 201 """Fixture to create a TorchToHFDatasetCOCO instance.""" 202 return TorchToHFDatasetCOCO(torch_dataset)
Fixture to create a TorchToHFDatasetCOCO instance.
205def test_hf_dataset_conversion(converter_torch_hf): 206 """Test converting the torch dataset to HF dataset.""" 207 hf_dataset = converter_torch_hf.convert() 208 # Get splits from dataset 209 splits = set(hf_dataset.keys()) 210 211 # Empty splits are allowed 212 if not splits: 213 return 214 215 ACCEPTED_SPLITS_HF = {Split.TRAIN, Split.TEST, Split.VALIDATION} 216 217 # If splits exist, they must be subset of ACCEPTED_SPLITS 218 assert splits.issubset( 219 ACCEPTED_SPLITS_HF 220 ), f"Invalid splits found: {splits} All splits must be one of {ACCEPTED_SPLITS_HF}" 221 222 # Only test instance type for valid splits 223 for split in splits: 224 assert isinstance( 225 hf_dataset[split], Dataset 226 ), f"{split} split should be a Dataset"
Test converting the torch dataset to HF dataset.
229def test_hf_dataset_sample(converter_torch_hf): 230 """Test getting a sample from the HF dataset.""" 231 hf_dataset = converter_torch_hf.convert() 232 for split in ACCEPTED_SPLITS: 233 if split in hf_dataset: 234 sample = hf_dataset[split][0] 235 assert "image_path" in sample 236 assert "objects" in sample 237 assert "split" in sample
Test getting a sample from the HF dataset.
240def test_hf_dataset_dataloader(converter_torch_hf): 241 """Test creating a DataLoader from the HF dataset.""" 242 hf_dataset = converter_torch_hf.convert() 243 for split in ACCEPTED_SPLITS: 244 if split in hf_dataset: 245 dataloader = DataLoader( 246 hf_dataset[split], 247 batch_size=batch_size, 248 collate_fn=lambda batch: ( 249 [item["image_path"] for item in batch], 250 [item["objects"] for item in batch], 251 [item["split"] for item in batch], 252 ), 253 ) 254 for batch in dataloader: 255 images, targets, splits = batch 256 for img, target, split in zip(images, targets, splits): 257 assert isinstance(img, str) 258 assert isinstance(target["bbox"], list) 259 assert isinstance(target["category_id"], list) 260 assert isinstance(split, str)
Test creating a DataLoader from the HF dataset.
263def test_hf_dataset_with_format(converter_torch_hf): 264 """Test setting the format of the HF dataset.""" 265 hf_dataset = converter_torch_hf.convert() 266 for split in ACCEPTED_SPLITS: 267 if split in hf_dataset: 268 hf_dataset[split] = hf_dataset[split].with_format("torch") 269 sample = hf_dataset[split][0] 270 assert isinstance(sample["image_path"], str) # Includes filepath 271 assert isinstance(sample["objects"]["bbox"], torch.Tensor) 272 assert isinstance(sample["objects"]["category_id"], torch.Tensor)
Test setting the format of the HF dataset.
276@pytest.fixture 277def empty_dataset(): 278 """Fixture to create an empty FiftyOne dataset.""" 279 try: 280 dataset = fo.Dataset(name="empty_dataset") 281 except: 282 dataset = fo.load_dataset("empty_dataset") 283 return dataset
Fixture to create an empty FiftyOne dataset.
286@pytest.fixture 287def no_annotations_dataset(): 288 """Fixture to create a FiftyOne dataset with no annotations.""" 289 try: 290 dataset = fo.Dataset(name="no_annotations_dataset") 291 dataset.add_sample(fo.Sample(filepath="image1.jpg")) 292 dataset.add_sample(fo.Sample(filepath="image2.jpg")) 293 except: 294 dataset = fo.load_dataset("no_annotations_dataset") 295 296 return dataset
Fixture to create a FiftyOne dataset with no annotations.
299def test_empty_dataset(empty_dataset): 300 """Test creating a torch dataset from an empty FiftyOne dataset.""" 301 dataset = FiftyOneTorchDatasetCOCO(empty_dataset) 302 assert len(dataset) == 0
Test creating a torch dataset from an empty FiftyOne dataset.
305def test_no_annotations_dataset(no_annotations_dataset): 306 """Test creating a torch dataset from a FiftyOne dataset with no annotations.""" 307 dataset = FiftyOneTorchDatasetCOCO(no_annotations_dataset) 308 assert len(dataset) == 2
Test creating a torch dataset from a FiftyOne dataset with no annotations.
311def test_detection_preservation(dataset_v51, torch_dataset, converter_torch_hf): 312 """Test that detections are preserved when converting between dataset formats.""" 313 314 # Get a sample from FiftyOne dataset 315 v51_sample = dataset_v51.first() 316 v51_detections = v51_sample[fisheye8k_gt_field].detections 317 v51_det_count = len(v51_detections) 318 319 # Get corresponding torch sample 320 torch_sample = torch_dataset[0] 321 torch_bboxes = torch_sample[1]["bbox"] 322 torch_categories = torch_sample[1]["category_id"] 323 324 # Build category mapping 325 categories = dataset_v51.distinct(f"{fisheye8k_gt_field}.detections.label") 326 category_map = {label: idx for idx, label in enumerate(categories)} 327 328 # Verify torch detection count matches 329 assert len(torch_bboxes) == v51_det_count 330 assert len(torch_categories) == v51_det_count 331 332 # Convert to HF dataset and get sample 333 hf_dataset = converter_torch_hf.convert() 334 split = get_split(v51_sample) 335 split_mapping = {"train": Split.TRAIN, "val": Split.VALIDATION, "test": Split.TEST} 336 hf_sample = hf_dataset[split_mapping[split]][0] 337 338 # Verify HF detection count matches 339 assert len(hf_sample["objects"]["bbox"]) == v51_det_count 340 assert len(hf_sample["objects"]["category_id"]) == v51_det_count 341 342 img_width = v51_sample.metadata.width 343 img_height = v51_sample.metadata.height 344 345 # Verify detection properties match between V51 and torch 346 for i, v51_det in enumerate(v51_detections): 347 # Check bounding box format conversion 348 v51_bbox = v51_det.bounding_box 349 torch_bbox = torch_sample[1]["bbox"][i].tolist() 350 351 # Verify coordinates with tolerance 352 assert ( 353 abs(v51_bbox[0] - torch_bbox[0] / img_width) < 0.01 354 ) # width normalization 355 assert ( 356 abs(v51_bbox[1] - torch_bbox[1] / img_height) < 0.01 357 ) # height normalization 358 assert abs(v51_bbox[2] - torch_bbox[2] / img_width) < 0.01 359 assert abs(v51_bbox[3] - torch_bbox[3] / img_height) < 0.01 360 361 # Verify category mapping for all classes 362 expected_category = category_map[v51_det.label] 363 assert ( 364 torch_categories[i] == expected_category 365 ), f"Mismatched category for {v51_det.label}" 366 assert hf_sample["objects"]["category_id"][i] == expected_category
Test that detections are preserved when converting between dataset formats.