tests.workflow_object_detection_hf_test

  1import logging
  2import os
  3import shutil
  4
  5import fiftyone as fo
  6import pytest
  7from fiftyone.utils.huggingface import load_from_hub
  8
  9import config.config
 10from main import workflow_auto_labeling_hf
 11from utils.data_loader import FiftyOneTorchDatasetCOCO, TorchToHFDatasetCOCO
 12
 13
 14@pytest.fixture(autouse=True)
 15def setup_logging():
 16    configure_logging()
 17
 18
 19@pytest.fixture(autouse=True)
 20def deactivate_hf_sync():
 21    config.config.HF_DO_UPLOAD = False
 22
 23
 24@pytest.fixture(autouse=True)
 25def deactivate_wandb_sync():
 26    config.config.WANDB_ACTIVE = False
 27
 28
 29import random
 30
 31from config.config import ACCEPTED_SPLITS
 32from utils.dataset_loader import _post_process_dataset
 33from utils.logging import configure_logging
 34
 35
 36@pytest.fixture
 37def dataset_v51():
 38    """Fixture to load a FiftyOne dataset from the hub."""
 39    dataset_name_hub = "Voxel51/fisheye8k"
 40    dataset_name = "fisheye8k_v51_hf_od_test"
 41    try:
 42        dataset = load_from_hub(
 43            repo_id=dataset_name_hub, max_samples=50, name=dataset_name
 44        )
 45        dataset = _post_process_dataset(dataset)
 46        for sample in dataset.iter_samples(progress=True, autosave=True):
 47            split = random.choice(ACCEPTED_SPLITS)
 48            sample.tags = [split]
 49    except:
 50        dataset = fo.load_dataset(dataset_name)
 51
 52    assert dataset is not None, "Failed to load or create the FiftyOne dataset"
 53    return dataset
 54
 55
 56@pytest.fixture
 57def dataset_hf(dataset_v51):
 58    """Converts a FiftyOne dataset to a HuggingFace compatible dataset format."""
 59
 60    pytorch_dataset = FiftyOneTorchDatasetCOCO(dataset_v51)
 61    pt_to_hf_converter = TorchToHFDatasetCOCO(pytorch_dataset)
 62    hf_dataset = pt_to_hf_converter.convert()
 63
 64    return hf_dataset
 65
 66
 67@pytest.mark.parametrize("mode", ["train", "inference", "inference_hf"])
 68def test_hf_object_detection(dataset_v51, dataset_hf, mode):
 69    """Test object detection by running workflow_auto_labeling_hf with a HuggingFace model across different modes (train/eval/inference)."""
 70    try:
 71        MODEL_NAME = "microsoft/conditional-detr-resnet-50"
 72
 73        if mode == "inference_hf":
 74            selected_mode = ["inference"]
 75            # Delete the whole folder to force download from HF
 76            folder = f"./output/models/object_detection_hf/fisheye8k_v51_hf_od_test/{MODEL_NAME.replace("-","_")}"
 77            if os.path.exists(folder):
 78                shutil.rmtree(folder)
 79                print(f"Deleted folder: {folder}")
 80            else:
 81                print(f"Folder does not exist: {folder}")
 82
 83        else:
 84            selected_mode = [mode]
 85
 86        print(f"Test for mode {mode}")
 87
 88        run_config = {
 89            "mode": selected_mode,
 90            "model_name": MODEL_NAME,
 91            "v51_dataset_name": dataset_v51.name,
 92            "epochs": 1,
 93            "early_stop_patience": 1,
 94            "early_stop_threshold": 0,
 95            "learning_rate": 5e-05,
 96            "weight_decay": 0.0001,
 97            "max_grad_norm": 0.01,
 98            "batch_size": 1,
 99            "image_size": [16, 16],
100            "n_worker_dataloader": 1,
101            "inference_settings": {
102                "do_eval": True,
103                "inference_on_test": True,
104                "model_hf": None,  # None (automatic selection) or Hugging Face ID
105                "detection_threshold": 0.2,
106            },
107        }
108
109        # Delete existing field
110        try:
111            dataset_v51.delete_sample_field(
112                "pred_od_microsoft_conditional_detr_resnet_50_fisheye8k_v51_hf_od_test"
113            )
114        except Exception as e:
115            print(f"Field was not deleted: {e}")
116
117        workflow_auto_labeling_hf(
118            dataset_v51, dataset_hf, run_config, wandb_activate=False
119        )
120
121        if mode != "train":
122            # Check results
123            # Get fields with predictions
124            predictions_fields = []
125            fields = dataset_v51.get_field_schema()
126            for field in fields:
127                if "pred_" in field:
128                    print(f"Field with detections: {field}")
129                    predictions_fields.append(field)
130
131            assert len(predictions_fields) > 0, "No fields with predictions found"
132            assert (
133                len(predictions_fields) == 1
134            ), f"More than one detection field found: {predictions_fields}"
135
136            # Only test requested samples
137            if run_config["inference_settings"]["inference_on_test"] is True:
138                dataset_view = dataset_v51.match_tags(["test", "val"])
139            else:
140                dataset_view = dataset_v51
141
142            found_detections = 0
143            for sample in dataset_view:
144                for field in predictions_fields:
145                    if sample[field]:
146                        found_detections += len(sample[field].detections)
147
148            assert found_detections > 0, "No detections found"
149
150    except Exception as e:
151        pytest.fail(f"Mode {mode} failed: {e}.")
@pytest.fixture(autouse=True)
def setup_logging():
15@pytest.fixture(autouse=True)
16def setup_logging():
17    configure_logging()
@pytest.fixture(autouse=True)
def deactivate_hf_sync():
20@pytest.fixture(autouse=True)
21def deactivate_hf_sync():
22    config.config.HF_DO_UPLOAD = False
@pytest.fixture(autouse=True)
def deactivate_wandb_sync():
25@pytest.fixture(autouse=True)
26def deactivate_wandb_sync():
27    config.config.WANDB_ACTIVE = False
@pytest.fixture
def dataset_v51():
37@pytest.fixture
38def dataset_v51():
39    """Fixture to load a FiftyOne dataset from the hub."""
40    dataset_name_hub = "Voxel51/fisheye8k"
41    dataset_name = "fisheye8k_v51_hf_od_test"
42    try:
43        dataset = load_from_hub(
44            repo_id=dataset_name_hub, max_samples=50, name=dataset_name
45        )
46        dataset = _post_process_dataset(dataset)
47        for sample in dataset.iter_samples(progress=True, autosave=True):
48            split = random.choice(ACCEPTED_SPLITS)
49            sample.tags = [split]
50    except:
51        dataset = fo.load_dataset(dataset_name)
52
53    assert dataset is not None, "Failed to load or create the FiftyOne dataset"
54    return dataset

Fixture to load a FiftyOne dataset from the hub.

@pytest.fixture
def dataset_hf(dataset_v51):
57@pytest.fixture
58def dataset_hf(dataset_v51):
59    """Converts a FiftyOne dataset to a HuggingFace compatible dataset format."""
60
61    pytorch_dataset = FiftyOneTorchDatasetCOCO(dataset_v51)
62    pt_to_hf_converter = TorchToHFDatasetCOCO(pytorch_dataset)
63    hf_dataset = pt_to_hf_converter.convert()
64
65    return hf_dataset

Converts a FiftyOne dataset to a HuggingFace compatible dataset format.

@pytest.mark.parametrize('mode', ['train', 'inference', 'inference_hf'])
def test_hf_object_detection(dataset_v51, dataset_hf, mode):
 68@pytest.mark.parametrize("mode", ["train", "inference", "inference_hf"])
 69def test_hf_object_detection(dataset_v51, dataset_hf, mode):
 70    """Test object detection by running workflow_auto_labeling_hf with a HuggingFace model across different modes (train/eval/inference)."""
 71    try:
 72        MODEL_NAME = "microsoft/conditional-detr-resnet-50"
 73
 74        if mode == "inference_hf":
 75            selected_mode = ["inference"]
 76            # Delete the whole folder to force download from HF
 77            folder = f"./output/models/object_detection_hf/fisheye8k_v51_hf_od_test/{MODEL_NAME.replace("-","_")}"
 78            if os.path.exists(folder):
 79                shutil.rmtree(folder)
 80                print(f"Deleted folder: {folder}")
 81            else:
 82                print(f"Folder does not exist: {folder}")
 83
 84        else:
 85            selected_mode = [mode]
 86
 87        print(f"Test for mode {mode}")
 88
 89        run_config = {
 90            "mode": selected_mode,
 91            "model_name": MODEL_NAME,
 92            "v51_dataset_name": dataset_v51.name,
 93            "epochs": 1,
 94            "early_stop_patience": 1,
 95            "early_stop_threshold": 0,
 96            "learning_rate": 5e-05,
 97            "weight_decay": 0.0001,
 98            "max_grad_norm": 0.01,
 99            "batch_size": 1,
100            "image_size": [16, 16],
101            "n_worker_dataloader": 1,
102            "inference_settings": {
103                "do_eval": True,
104                "inference_on_test": True,
105                "model_hf": None,  # None (automatic selection) or Hugging Face ID
106                "detection_threshold": 0.2,
107            },
108        }
109
110        # Delete existing field
111        try:
112            dataset_v51.delete_sample_field(
113                "pred_od_microsoft_conditional_detr_resnet_50_fisheye8k_v51_hf_od_test"
114            )
115        except Exception as e:
116            print(f"Field was not deleted: {e}")
117
118        workflow_auto_labeling_hf(
119            dataset_v51, dataset_hf, run_config, wandb_activate=False
120        )
121
122        if mode != "train":
123            # Check results
124            # Get fields with predictions
125            predictions_fields = []
126            fields = dataset_v51.get_field_schema()
127            for field in fields:
128                if "pred_" in field:
129                    print(f"Field with detections: {field}")
130                    predictions_fields.append(field)
131
132            assert len(predictions_fields) > 0, "No fields with predictions found"
133            assert (
134                len(predictions_fields) == 1
135            ), f"More than one detection field found: {predictions_fields}"
136
137            # Only test requested samples
138            if run_config["inference_settings"]["inference_on_test"] is True:
139                dataset_view = dataset_v51.match_tags(["test", "val"])
140            else:
141                dataset_view = dataset_v51
142
143            found_detections = 0
144            for sample in dataset_view:
145                for field in predictions_fields:
146                    if sample[field]:
147                        found_detections += len(sample[field].detections)
148
149            assert found_detections > 0, "No detections found"
150
151    except Exception as e:
152        pytest.fail(f"Mode {mode} failed: {e}.")

Test object detection by running workflow_auto_labeling_hf with a HuggingFace model across different modes (train/eval/inference).