tests.data_loader_test

  1import random
  2
  3import fiftyone as fo
  4import pytest
  5import torch
  6from datasets import Dataset, Split
  7from fiftyone.utils.huggingface import load_from_hub
  8from torch.utils.data import DataLoader
  9
 10from config.config import ACCEPTED_SPLITS
 11from utils.data_loader import FiftyOneTorchDatasetCOCO, TorchToHFDatasetCOCO
 12from utils.dataset_loader import get_split
 13
 14fisheye8k_gt_field = "detections"
 15max_samples = 50
 16batch_size = 4
 17
 18
 19@pytest.fixture
 20def dataset_v51():
 21    """Fixture to load a FiftyOne dataset from the hub."""
 22    dataset_name_hub = "Voxel51/fisheye8k"
 23    dataset_name = "fisheye8k_pytest"
 24    try:
 25        dataset = load_from_hub(
 26            repo_id=dataset_name_hub, max_samples=max_samples, name=dataset_name
 27        )
 28        # Ensure that all splits are represented (normally Data Engine takes care of that)
 29        for sample in dataset.iter_samples(progress=True, autosave=True):
 30            sample.tags = [random.choice(ACCEPTED_SPLITS)]
 31    except:
 32        dataset = fo.load_dataset(dataset_name)
 33    return dataset
 34
 35
 36@pytest.fixture
 37def dataset_v51_no_splits_no_detections():
 38    """Fixture to load a FiftyOne dataset from the hub."""
 39    dataset_name_hub = "Voxel51/fisheye8k"
 40    dataset_name = "fisheye8k_pytest_raw"
 41    try:
 42        dataset = load_from_hub(
 43            repo_id=dataset_name_hub, max_samples=max_samples, name=dataset_name
 44        )
 45        # Remove all tags
 46        for sample in dataset.iter_samples(progress=True, autosave=True):
 47            sample.tags = []
 48
 49        # Remove detection field
 50        dataset.delete_sample_field(fisheye8k_gt_field)
 51    except:
 52        dataset = fo.load_dataset(dataset_name)
 53    return dataset
 54
 55
 56def test_conversions_on_raw_dataset(dataset_v51_no_splits_no_detections):
 57    "Test if conversions work with a V51 without labels or a split"
 58    torch_dataset = FiftyOneTorchDatasetCOCO(
 59        dataset_v51_no_splits_no_detections, gt_field=None
 60    )
 61    hf_dataset_converter = TorchToHFDatasetCOCO(torch_dataset)
 62    hf_dataset = hf_dataset_converter.convert()
 63
 64    assert torch_dataset is not None
 65    assert hf_dataset is not None
 66
 67
 68def test_dataset_v51(dataset_v51):
 69    assert dataset_v51 is not None
 70
 71
 72# Tests for torch dataset
 73@pytest.fixture
 74def torch_dataset(dataset_v51):
 75    """Fixture to create a FiftyOneTorchDatasetCOCO instance."""
 76    return FiftyOneTorchDatasetCOCO(dataset_v51, gt_field=fisheye8k_gt_field)
 77
 78
 79def test_torch_dataset_length(torch_dataset):
 80    """Test the length of the torch dataset."""
 81    assert len(torch_dataset) == max_samples
 82
 83
 84@pytest.mark.parametrize("index", [0, 1, 2])
 85def test_torch_dataset_getitem(torch_dataset, index):
 86    """Test getting an item from the torch dataset."""
 87    img, target = torch_dataset[index]
 88    assert isinstance(img, torch.Tensor)
 89    assert "bbox" in target
 90    assert "category_id" in target
 91    assert "image_id" in target
 92    assert "area" in target
 93    assert "iscrowd" in target
 94
 95
 96def test_torch_dataset_getitem_invalid_index(torch_dataset):
 97    """Test getting an item with an invalid index from the torch dataset."""
 98    test_index = max_samples * 10
 99    with pytest.raises(IndexError):
100        torch_dataset[test_index]
101
102
103def test_torch_dataset_getitems(torch_dataset):
104    """Test getting multiple items from the torch dataset."""
105    samples = torch_dataset.__getitems__([0, 1, 2])
106    assert len(samples) == 3
107    for img, target in samples:
108        assert isinstance(img, torch.Tensor)
109        assert "bbox" in target
110
111
112def test_torch_dataset_getitems_invalid_indices(torch_dataset):
113    """Test getting multiple items with invalid indices from the torch dataset."""
114    test_index_1 = max_samples * 10
115    test_index_2 = test_index_1 + 1
116    with pytest.raises(IndexError):
117        torch_dataset.__getitems__([test_index_1, test_index_2])
118
119
120def test_torch_dataset_get_classes(torch_dataset):
121    """Test getting classes from the torch dataset."""
122    classes = torch_dataset.get_classes()
123    assert isinstance(classes, list)
124
125
126def test_torch_dataset_get_splits(torch_dataset):
127    """Test getting splits from the torch dataset."""
128    splits = torch_dataset.get_splits()
129    # Test return type is set
130    assert isinstance(splits, set), "get_splits() should return a set"
131
132    # Empty splits are allowed
133    if not splits:
134        return
135
136    # If splits exist, they must be subset of ACCEPTED_SPLITS
137    assert splits.issubset(
138        set(ACCEPTED_SPLITS)
139    ), f"Invalid splits found: {splits} All splits must be one of {ACCEPTED_SPLITS}"
140
141
142# Tests for torch dataloader
143@pytest.fixture
144def dataloader(torch_dataset):
145    """Fixture to create a DataLoader instance."""
146    return DataLoader(
147        torch_dataset,
148        batch_size=batch_size,
149        collate_fn=lambda batch: list(zip(*batch)),
150        shuffle=True,
151    )
152
153
154def test_dataloader_length(dataloader, torch_dataset):
155    """Test the length of the dataloader."""
156    assert len(dataloader) == (len(torch_dataset) + 3) // batch_size
157
158
159def test_dataloader_batch(dataloader, torch_dataset):
160    """Test getting a batch from the dataloader."""
161    total_samples = len(torch_dataset)
162    samples_processed = 0
163
164    for batch in dataloader:
165        imgs, targets = batch
166        current_batch_size = len(imgs)
167
168        # For the last batch, size might be smaller
169        if samples_processed + batch_size > total_samples:
170            expected_size = total_samples - samples_processed
171            assert (
172                current_batch_size == expected_size
173            ), f"Last batch size should be {expected_size} but got {current_batch_size}"
174        else:
175            assert (
176                current_batch_size == batch_size
177            ), f"Batch size should be {batch_size} but got {current_batch_size}"
178
179        assert len(targets) == current_batch_size
180
181        for img, target in zip(imgs, targets):
182            assert isinstance(img, torch.Tensor)
183            assert "bbox" in target
184            assert "category_id" in target
185            assert "image_id" in target
186            assert "area" in target
187            assert "iscrowd" in target
188
189        samples_processed += current_batch_size
190
191    # Verify we processed all samples
192    assert (
193        samples_processed == total_samples
194    ), f"Processed {samples_processed} samples but dataset has {total_samples}"
195
196
197# Tests for HF dataset
198@pytest.fixture
199def converter_torch_hf(torch_dataset):
200    """Fixture to create a TorchToHFDatasetCOCO instance."""
201    return TorchToHFDatasetCOCO(torch_dataset)
202
203
204def test_hf_dataset_conversion(converter_torch_hf):
205    """Test converting the torch dataset to HF dataset."""
206    hf_dataset = converter_torch_hf.convert()
207    # Get splits from dataset
208    splits = set(hf_dataset.keys())
209
210    # Empty splits are allowed
211    if not splits:
212        return
213
214    ACCEPTED_SPLITS_HF = {Split.TRAIN, Split.TEST, Split.VALIDATION}
215
216    # If splits exist, they must be subset of ACCEPTED_SPLITS
217    assert splits.issubset(
218        ACCEPTED_SPLITS_HF
219    ), f"Invalid splits found: {splits} All splits must be one of {ACCEPTED_SPLITS_HF}"
220
221    # Only test instance type for valid splits
222    for split in splits:
223        assert isinstance(
224            hf_dataset[split], Dataset
225        ), f"{split} split should be a Dataset"
226
227
228def test_hf_dataset_sample(converter_torch_hf):
229    """Test getting a sample from the HF dataset."""
230    hf_dataset = converter_torch_hf.convert()
231    for split in ACCEPTED_SPLITS:
232        if split in hf_dataset:
233            sample = hf_dataset[split][0]
234            assert "image_path" in sample
235            assert "objects" in sample
236            assert "split" in sample
237
238
239def test_hf_dataset_dataloader(converter_torch_hf):
240    """Test creating a DataLoader from the HF dataset."""
241    hf_dataset = converter_torch_hf.convert()
242    for split in ACCEPTED_SPLITS:
243        if split in hf_dataset:
244            dataloader = DataLoader(
245                hf_dataset[split],
246                batch_size=batch_size,
247                collate_fn=lambda batch: (
248                    [item["image_path"] for item in batch],
249                    [item["objects"] for item in batch],
250                    [item["split"] for item in batch],
251                ),
252            )
253            for batch in dataloader:
254                images, targets, splits = batch
255                for img, target, split in zip(images, targets, splits):
256                    assert isinstance(img, str)
257                    assert isinstance(target["bbox"], list)
258                    assert isinstance(target["category_id"], list)
259                    assert isinstance(split, str)
260
261
262def test_hf_dataset_with_format(converter_torch_hf):
263    """Test setting the format of the HF dataset."""
264    hf_dataset = converter_torch_hf.convert()
265    for split in ACCEPTED_SPLITS:
266        if split in hf_dataset:
267            hf_dataset[split] = hf_dataset[split].with_format("torch")
268            sample = hf_dataset[split][0]
269            assert isinstance(sample["image_path"], str)  # Includes filepath
270            assert isinstance(sample["objects"]["bbox"], torch.Tensor)
271            assert isinstance(sample["objects"]["category_id"], torch.Tensor)
272
273
274# Tests for incomplete datasets
275@pytest.fixture
276def empty_dataset():
277    """Fixture to create an empty FiftyOne dataset."""
278    try:
279        dataset = fo.Dataset(name="empty_dataset")
280    except:
281        dataset = fo.load_dataset("empty_dataset")
282    return dataset
283
284
285@pytest.fixture
286def no_annotations_dataset():
287    """Fixture to create a FiftyOne dataset with no annotations."""
288    try:
289        dataset = fo.Dataset(name="no_annotations_dataset")
290        dataset.add_sample(fo.Sample(filepath="image1.jpg"))
291        dataset.add_sample(fo.Sample(filepath="image2.jpg"))
292    except:
293        dataset = fo.load_dataset("no_annotations_dataset")
294
295    return dataset
296
297
298def test_empty_dataset(empty_dataset):
299    """Test creating a torch dataset from an empty FiftyOne dataset."""
300    dataset = FiftyOneTorchDatasetCOCO(empty_dataset)
301    assert len(dataset) == 0
302
303
304def test_no_annotations_dataset(no_annotations_dataset):
305    """Test creating a torch dataset from a FiftyOne dataset with no annotations."""
306    dataset = FiftyOneTorchDatasetCOCO(no_annotations_dataset)
307    assert len(dataset) == 2
308
309
310def test_detection_preservation(dataset_v51, torch_dataset, converter_torch_hf):
311    """Test that detections are preserved when converting between dataset formats."""
312
313    # Get a sample from FiftyOne dataset
314    v51_sample = dataset_v51.first()
315    v51_detections = v51_sample[fisheye8k_gt_field].detections
316    v51_det_count = len(v51_detections)
317
318    # Get corresponding torch sample
319    torch_sample = torch_dataset[0]
320    torch_bboxes = torch_sample[1]["bbox"]
321    torch_categories = torch_sample[1]["category_id"]
322
323    # Build category mapping
324    categories = dataset_v51.distinct(f"{fisheye8k_gt_field}.detections.label")
325    category_map = {label: idx for idx, label in enumerate(categories)}
326
327    # Verify torch detection count matches
328    assert len(torch_bboxes) == v51_det_count
329    assert len(torch_categories) == v51_det_count
330
331    # Convert to HF dataset and get sample
332    hf_dataset = converter_torch_hf.convert()
333    split = get_split(v51_sample)
334    split_mapping = {"train": Split.TRAIN, "val": Split.VALIDATION, "test": Split.TEST}
335    hf_sample = hf_dataset[split_mapping[split]][0]
336
337    # Verify HF detection count matches
338    assert len(hf_sample["objects"]["bbox"]) == v51_det_count
339    assert len(hf_sample["objects"]["category_id"]) == v51_det_count
340
341    img_width = v51_sample.metadata.width
342    img_height = v51_sample.metadata.height
343
344    # Verify detection properties match between V51 and torch
345    for i, v51_det in enumerate(v51_detections):
346        # Check bounding box format conversion
347        v51_bbox = v51_det.bounding_box
348        torch_bbox = torch_sample[1]["bbox"][i].tolist()
349
350        # Verify coordinates with tolerance
351        assert (
352            abs(v51_bbox[0] - torch_bbox[0] / img_width) < 0.01
353        )  # width normalization
354        assert (
355            abs(v51_bbox[1] - torch_bbox[1] / img_height) < 0.01
356        )  # height normalization
357        assert abs(v51_bbox[2] - torch_bbox[2] / img_width) < 0.01
358        assert abs(v51_bbox[3] - torch_bbox[3] / img_height) < 0.01
359
360        # Verify category mapping for all classes
361        expected_category = category_map[v51_det.label]
362        assert (
363            torch_categories[i] == expected_category
364        ), f"Mismatched category for {v51_det.label}"
365        assert hf_sample["objects"]["category_id"][i] == expected_category
fisheye8k_gt_field = 'detections'
max_samples = 50
batch_size = 4
@pytest.fixture
def dataset_v51():
20@pytest.fixture
21def dataset_v51():
22    """Fixture to load a FiftyOne dataset from the hub."""
23    dataset_name_hub = "Voxel51/fisheye8k"
24    dataset_name = "fisheye8k_pytest"
25    try:
26        dataset = load_from_hub(
27            repo_id=dataset_name_hub, max_samples=max_samples, name=dataset_name
28        )
29        # Ensure that all splits are represented (normally Data Engine takes care of that)
30        for sample in dataset.iter_samples(progress=True, autosave=True):
31            sample.tags = [random.choice(ACCEPTED_SPLITS)]
32    except:
33        dataset = fo.load_dataset(dataset_name)
34    return dataset

Fixture to load a FiftyOne dataset from the hub.

@pytest.fixture
def dataset_v51_no_splits_no_detections():
37@pytest.fixture
38def dataset_v51_no_splits_no_detections():
39    """Fixture to load a FiftyOne dataset from the hub."""
40    dataset_name_hub = "Voxel51/fisheye8k"
41    dataset_name = "fisheye8k_pytest_raw"
42    try:
43        dataset = load_from_hub(
44            repo_id=dataset_name_hub, max_samples=max_samples, name=dataset_name
45        )
46        # Remove all tags
47        for sample in dataset.iter_samples(progress=True, autosave=True):
48            sample.tags = []
49
50        # Remove detection field
51        dataset.delete_sample_field(fisheye8k_gt_field)
52    except:
53        dataset = fo.load_dataset(dataset_name)
54    return dataset

Fixture to load a FiftyOne dataset from the hub.

def test_conversions_on_raw_dataset(dataset_v51_no_splits_no_detections):
57def test_conversions_on_raw_dataset(dataset_v51_no_splits_no_detections):
58    "Test if conversions work with a V51 without labels or a split"
59    torch_dataset = FiftyOneTorchDatasetCOCO(
60        dataset_v51_no_splits_no_detections, gt_field=None
61    )
62    hf_dataset_converter = TorchToHFDatasetCOCO(torch_dataset)
63    hf_dataset = hf_dataset_converter.convert()
64
65    assert torch_dataset is not None
66    assert hf_dataset is not None

Test if conversions work with a V51 without labels or a split

def test_dataset_v51(dataset_v51):
69def test_dataset_v51(dataset_v51):
70    assert dataset_v51 is not None
@pytest.fixture
def torch_dataset(dataset_v51):
74@pytest.fixture
75def torch_dataset(dataset_v51):
76    """Fixture to create a FiftyOneTorchDatasetCOCO instance."""
77    return FiftyOneTorchDatasetCOCO(dataset_v51, gt_field=fisheye8k_gt_field)

Fixture to create a FiftyOneTorchDatasetCOCO instance.

def test_torch_dataset_length(torch_dataset):
80def test_torch_dataset_length(torch_dataset):
81    """Test the length of the torch dataset."""
82    assert len(torch_dataset) == max_samples

Test the length of the torch dataset.

@pytest.mark.parametrize('index', [0, 1, 2])
def test_torch_dataset_getitem(torch_dataset, index):
85@pytest.mark.parametrize("index", [0, 1, 2])
86def test_torch_dataset_getitem(torch_dataset, index):
87    """Test getting an item from the torch dataset."""
88    img, target = torch_dataset[index]
89    assert isinstance(img, torch.Tensor)
90    assert "bbox" in target
91    assert "category_id" in target
92    assert "image_id" in target
93    assert "area" in target
94    assert "iscrowd" in target

Test getting an item from the torch dataset.

def test_torch_dataset_getitem_invalid_index(torch_dataset):
 97def test_torch_dataset_getitem_invalid_index(torch_dataset):
 98    """Test getting an item with an invalid index from the torch dataset."""
 99    test_index = max_samples * 10
100    with pytest.raises(IndexError):
101        torch_dataset[test_index]

Test getting an item with an invalid index from the torch dataset.

def test_torch_dataset_getitems(torch_dataset):
104def test_torch_dataset_getitems(torch_dataset):
105    """Test getting multiple items from the torch dataset."""
106    samples = torch_dataset.__getitems__([0, 1, 2])
107    assert len(samples) == 3
108    for img, target in samples:
109        assert isinstance(img, torch.Tensor)
110        assert "bbox" in target

Test getting multiple items from the torch dataset.

def test_torch_dataset_getitems_invalid_indices(torch_dataset):
113def test_torch_dataset_getitems_invalid_indices(torch_dataset):
114    """Test getting multiple items with invalid indices from the torch dataset."""
115    test_index_1 = max_samples * 10
116    test_index_2 = test_index_1 + 1
117    with pytest.raises(IndexError):
118        torch_dataset.__getitems__([test_index_1, test_index_2])

Test getting multiple items with invalid indices from the torch dataset.

def test_torch_dataset_get_classes(torch_dataset):
121def test_torch_dataset_get_classes(torch_dataset):
122    """Test getting classes from the torch dataset."""
123    classes = torch_dataset.get_classes()
124    assert isinstance(classes, list)

Test getting classes from the torch dataset.

def test_torch_dataset_get_splits(torch_dataset):
127def test_torch_dataset_get_splits(torch_dataset):
128    """Test getting splits from the torch dataset."""
129    splits = torch_dataset.get_splits()
130    # Test return type is set
131    assert isinstance(splits, set), "get_splits() should return a set"
132
133    # Empty splits are allowed
134    if not splits:
135        return
136
137    # If splits exist, they must be subset of ACCEPTED_SPLITS
138    assert splits.issubset(
139        set(ACCEPTED_SPLITS)
140    ), f"Invalid splits found: {splits} All splits must be one of {ACCEPTED_SPLITS}"

Test getting splits from the torch dataset.

@pytest.fixture
def dataloader(torch_dataset):
144@pytest.fixture
145def dataloader(torch_dataset):
146    """Fixture to create a DataLoader instance."""
147    return DataLoader(
148        torch_dataset,
149        batch_size=batch_size,
150        collate_fn=lambda batch: list(zip(*batch)),
151        shuffle=True,
152    )

Fixture to create a DataLoader instance.

def test_dataloader_length(dataloader, torch_dataset):
155def test_dataloader_length(dataloader, torch_dataset):
156    """Test the length of the dataloader."""
157    assert len(dataloader) == (len(torch_dataset) + 3) // batch_size

Test the length of the dataloader.

def test_dataloader_batch(dataloader, torch_dataset):
160def test_dataloader_batch(dataloader, torch_dataset):
161    """Test getting a batch from the dataloader."""
162    total_samples = len(torch_dataset)
163    samples_processed = 0
164
165    for batch in dataloader:
166        imgs, targets = batch
167        current_batch_size = len(imgs)
168
169        # For the last batch, size might be smaller
170        if samples_processed + batch_size > total_samples:
171            expected_size = total_samples - samples_processed
172            assert (
173                current_batch_size == expected_size
174            ), f"Last batch size should be {expected_size} but got {current_batch_size}"
175        else:
176            assert (
177                current_batch_size == batch_size
178            ), f"Batch size should be {batch_size} but got {current_batch_size}"
179
180        assert len(targets) == current_batch_size
181
182        for img, target in zip(imgs, targets):
183            assert isinstance(img, torch.Tensor)
184            assert "bbox" in target
185            assert "category_id" in target
186            assert "image_id" in target
187            assert "area" in target
188            assert "iscrowd" in target
189
190        samples_processed += current_batch_size
191
192    # Verify we processed all samples
193    assert (
194        samples_processed == total_samples
195    ), f"Processed {samples_processed} samples but dataset has {total_samples}"

Test getting a batch from the dataloader.

@pytest.fixture
def converter_torch_hf(torch_dataset):
199@pytest.fixture
200def converter_torch_hf(torch_dataset):
201    """Fixture to create a TorchToHFDatasetCOCO instance."""
202    return TorchToHFDatasetCOCO(torch_dataset)

Fixture to create a TorchToHFDatasetCOCO instance.

def test_hf_dataset_conversion(converter_torch_hf):
205def test_hf_dataset_conversion(converter_torch_hf):
206    """Test converting the torch dataset to HF dataset."""
207    hf_dataset = converter_torch_hf.convert()
208    # Get splits from dataset
209    splits = set(hf_dataset.keys())
210
211    # Empty splits are allowed
212    if not splits:
213        return
214
215    ACCEPTED_SPLITS_HF = {Split.TRAIN, Split.TEST, Split.VALIDATION}
216
217    # If splits exist, they must be subset of ACCEPTED_SPLITS
218    assert splits.issubset(
219        ACCEPTED_SPLITS_HF
220    ), f"Invalid splits found: {splits} All splits must be one of {ACCEPTED_SPLITS_HF}"
221
222    # Only test instance type for valid splits
223    for split in splits:
224        assert isinstance(
225            hf_dataset[split], Dataset
226        ), f"{split} split should be a Dataset"

Test converting the torch dataset to HF dataset.

def test_hf_dataset_sample(converter_torch_hf):
229def test_hf_dataset_sample(converter_torch_hf):
230    """Test getting a sample from the HF dataset."""
231    hf_dataset = converter_torch_hf.convert()
232    for split in ACCEPTED_SPLITS:
233        if split in hf_dataset:
234            sample = hf_dataset[split][0]
235            assert "image_path" in sample
236            assert "objects" in sample
237            assert "split" in sample

Test getting a sample from the HF dataset.

def test_hf_dataset_dataloader(converter_torch_hf):
240def test_hf_dataset_dataloader(converter_torch_hf):
241    """Test creating a DataLoader from the HF dataset."""
242    hf_dataset = converter_torch_hf.convert()
243    for split in ACCEPTED_SPLITS:
244        if split in hf_dataset:
245            dataloader = DataLoader(
246                hf_dataset[split],
247                batch_size=batch_size,
248                collate_fn=lambda batch: (
249                    [item["image_path"] for item in batch],
250                    [item["objects"] for item in batch],
251                    [item["split"] for item in batch],
252                ),
253            )
254            for batch in dataloader:
255                images, targets, splits = batch
256                for img, target, split in zip(images, targets, splits):
257                    assert isinstance(img, str)
258                    assert isinstance(target["bbox"], list)
259                    assert isinstance(target["category_id"], list)
260                    assert isinstance(split, str)

Test creating a DataLoader from the HF dataset.

def test_hf_dataset_with_format(converter_torch_hf):
263def test_hf_dataset_with_format(converter_torch_hf):
264    """Test setting the format of the HF dataset."""
265    hf_dataset = converter_torch_hf.convert()
266    for split in ACCEPTED_SPLITS:
267        if split in hf_dataset:
268            hf_dataset[split] = hf_dataset[split].with_format("torch")
269            sample = hf_dataset[split][0]
270            assert isinstance(sample["image_path"], str)  # Includes filepath
271            assert isinstance(sample["objects"]["bbox"], torch.Tensor)
272            assert isinstance(sample["objects"]["category_id"], torch.Tensor)

Test setting the format of the HF dataset.

@pytest.fixture
def empty_dataset():
276@pytest.fixture
277def empty_dataset():
278    """Fixture to create an empty FiftyOne dataset."""
279    try:
280        dataset = fo.Dataset(name="empty_dataset")
281    except:
282        dataset = fo.load_dataset("empty_dataset")
283    return dataset

Fixture to create an empty FiftyOne dataset.

@pytest.fixture
def no_annotations_dataset():
286@pytest.fixture
287def no_annotations_dataset():
288    """Fixture to create a FiftyOne dataset with no annotations."""
289    try:
290        dataset = fo.Dataset(name="no_annotations_dataset")
291        dataset.add_sample(fo.Sample(filepath="image1.jpg"))
292        dataset.add_sample(fo.Sample(filepath="image2.jpg"))
293    except:
294        dataset = fo.load_dataset("no_annotations_dataset")
295
296    return dataset

Fixture to create a FiftyOne dataset with no annotations.

def test_empty_dataset(empty_dataset):
299def test_empty_dataset(empty_dataset):
300    """Test creating a torch dataset from an empty FiftyOne dataset."""
301    dataset = FiftyOneTorchDatasetCOCO(empty_dataset)
302    assert len(dataset) == 0

Test creating a torch dataset from an empty FiftyOne dataset.

def test_no_annotations_dataset(no_annotations_dataset):
305def test_no_annotations_dataset(no_annotations_dataset):
306    """Test creating a torch dataset from a FiftyOne dataset with no annotations."""
307    dataset = FiftyOneTorchDatasetCOCO(no_annotations_dataset)
308    assert len(dataset) == 2

Test creating a torch dataset from a FiftyOne dataset with no annotations.

def test_detection_preservation(dataset_v51, torch_dataset, converter_torch_hf):
311def test_detection_preservation(dataset_v51, torch_dataset, converter_torch_hf):
312    """Test that detections are preserved when converting between dataset formats."""
313
314    # Get a sample from FiftyOne dataset
315    v51_sample = dataset_v51.first()
316    v51_detections = v51_sample[fisheye8k_gt_field].detections
317    v51_det_count = len(v51_detections)
318
319    # Get corresponding torch sample
320    torch_sample = torch_dataset[0]
321    torch_bboxes = torch_sample[1]["bbox"]
322    torch_categories = torch_sample[1]["category_id"]
323
324    # Build category mapping
325    categories = dataset_v51.distinct(f"{fisheye8k_gt_field}.detections.label")
326    category_map = {label: idx for idx, label in enumerate(categories)}
327
328    # Verify torch detection count matches
329    assert len(torch_bboxes) == v51_det_count
330    assert len(torch_categories) == v51_det_count
331
332    # Convert to HF dataset and get sample
333    hf_dataset = converter_torch_hf.convert()
334    split = get_split(v51_sample)
335    split_mapping = {"train": Split.TRAIN, "val": Split.VALIDATION, "test": Split.TEST}
336    hf_sample = hf_dataset[split_mapping[split]][0]
337
338    # Verify HF detection count matches
339    assert len(hf_sample["objects"]["bbox"]) == v51_det_count
340    assert len(hf_sample["objects"]["category_id"]) == v51_det_count
341
342    img_width = v51_sample.metadata.width
343    img_height = v51_sample.metadata.height
344
345    # Verify detection properties match between V51 and torch
346    for i, v51_det in enumerate(v51_detections):
347        # Check bounding box format conversion
348        v51_bbox = v51_det.bounding_box
349        torch_bbox = torch_sample[1]["bbox"][i].tolist()
350
351        # Verify coordinates with tolerance
352        assert (
353            abs(v51_bbox[0] - torch_bbox[0] / img_width) < 0.01
354        )  # width normalization
355        assert (
356            abs(v51_bbox[1] - torch_bbox[1] / img_height) < 0.01
357        )  # height normalization
358        assert abs(v51_bbox[2] - torch_bbox[2] / img_width) < 0.01
359        assert abs(v51_bbox[3] - torch_bbox[3] / img_height) < 0.01
360
361        # Verify category mapping for all classes
362        expected_category = category_map[v51_det.label]
363        assert (
364            torch_categories[i] == expected_category
365        ), f"Mismatched category for {v51_det.label}"
366        assert hf_sample["objects"]["category_id"][i] == expected_category

Test that detections are preserved when converting between dataset formats.