tests.workflow_embedding_selection_test

 1import os
 2
 3import fiftyone as fo
 4import pytest
 5from fiftyone import ViewField as F
 6from fiftyone.utils.huggingface import load_from_hub
 7
 8import config.config
 9from main import workflow_embedding_selection
10from utils.dataset_loader import load_dataset_info
11from utils.logging import configure_logging
12from workflows.embedding_selection import BRAIN_TAXONOMY
13
14
15@pytest.fixture(autouse=True)
16def deactivate_hf_sync():
17    config.config.HF_DO_UPLOAD = False
18
19
20@pytest.fixture(autouse=True)
21def setup_logging():
22    configure_logging()
23
24
25max_samples = 100
26
27
28@pytest.fixture
29def dataset_v51():
30    """Fixture to load a FiftyOne dataset from the hub."""
31    dataset_name_hub = "Voxel51/fisheye8k"
32    dataset_name = "fisheye8k_v51_brain_test"
33    try:
34        dataset = load_from_hub(
35            repo_id=dataset_name_hub, max_samples=max_samples, name=dataset_name
36        )
37    except:
38        dataset = fo.load_dataset(dataset_name)
39    assert dataset is not None, "Failed to load or create the FiftyOne dataset"
40    return dataset
41
42
43@pytest.mark.parametrize("mode", ["compute", "load", "load_hf"])
44def test_embedding_selection(dataset_v51, mode):
45    """Tests the embedding selection workflow on a given dataset with specified model and configuration parameters."""
46
47    MODEL_NAME = "mobilenet-v2-imagenet-torch"
48    selected_mode = mode
49    if mode == "load_hf":
50        local_folder = "./output/embeddings/fisheye8k_v51_brain_test/"
51        model_name_key = MODEL_NAME.replace("-", "_")
52        for filename in os.listdir(local_folder):
53            if model_name_key in filename:
54                file_path = os.path.join(local_folder, filename)
55                try:
56                    os.remove(file_path)
57                    print(f"Deleted: {file_path}")
58                except OSError as e:
59                    print(f"Error deleting {file_path}: {e}")
60        selected_mode = "load"
61
62    dataset_info = load_dataset_info("fisheye8k")  # Use loader for actual dataset
63    dataset_info["name"] = (
64        "fisheye8k_v51_brain_test"  # Update with test name for local tests where both exist
65    )
66
67    config = {
68        "mode": selected_mode,
69        "parameters": {
70            "compute_representativeness": 0.99,
71            "compute_unique_images_greedy": 0.01,
72            "compute_unique_images_deterministic": 0.99,
73            "compute_similar_images": 0.03,
74            "neighbour_count": 3,
75        },
76    }
77
78    wandb_activate = False
79
80    workflow_embedding_selection(
81        dataset_v51, dataset_info, MODEL_NAME, config, wandb_activate
82    )
83
84    # Check number of selected samples
85    results_field = BRAIN_TAXONOMY["field"]
86    n_samples_selected = 0
87    for key in BRAIN_TAXONOMY:
88        if "value_" in key:
89            value = BRAIN_TAXONOMY[key]
90            view_result = dataset_v51.match(F(results_field) == value)
91            n_samples = len(view_result)
92            print(f"Found {n_samples} samples for {results_field}/{value}")
93            n_samples_selected += n_samples
94
95    # Assert if no samples were selected
96    assert n_samples_selected != 0, "No samples were selected"
@pytest.fixture(autouse=True)
def deactivate_hf_sync():
16@pytest.fixture(autouse=True)
17def deactivate_hf_sync():
18    config.config.HF_DO_UPLOAD = False
@pytest.fixture(autouse=True)
def setup_logging():
21@pytest.fixture(autouse=True)
22def setup_logging():
23    configure_logging()
max_samples = 100
@pytest.fixture
def dataset_v51():
29@pytest.fixture
30def dataset_v51():
31    """Fixture to load a FiftyOne dataset from the hub."""
32    dataset_name_hub = "Voxel51/fisheye8k"
33    dataset_name = "fisheye8k_v51_brain_test"
34    try:
35        dataset = load_from_hub(
36            repo_id=dataset_name_hub, max_samples=max_samples, name=dataset_name
37        )
38    except:
39        dataset = fo.load_dataset(dataset_name)
40    assert dataset is not None, "Failed to load or create the FiftyOne dataset"
41    return dataset

Fixture to load a FiftyOne dataset from the hub.

@pytest.mark.parametrize('mode', ['compute', 'load', 'load_hf'])
def test_embedding_selection(dataset_v51, mode):
44@pytest.mark.parametrize("mode", ["compute", "load", "load_hf"])
45def test_embedding_selection(dataset_v51, mode):
46    """Tests the embedding selection workflow on a given dataset with specified model and configuration parameters."""
47
48    MODEL_NAME = "mobilenet-v2-imagenet-torch"
49    selected_mode = mode
50    if mode == "load_hf":
51        local_folder = "./output/embeddings/fisheye8k_v51_brain_test/"
52        model_name_key = MODEL_NAME.replace("-", "_")
53        for filename in os.listdir(local_folder):
54            if model_name_key in filename:
55                file_path = os.path.join(local_folder, filename)
56                try:
57                    os.remove(file_path)
58                    print(f"Deleted: {file_path}")
59                except OSError as e:
60                    print(f"Error deleting {file_path}: {e}")
61        selected_mode = "load"
62
63    dataset_info = load_dataset_info("fisheye8k")  # Use loader for actual dataset
64    dataset_info["name"] = (
65        "fisheye8k_v51_brain_test"  # Update with test name for local tests where both exist
66    )
67
68    config = {
69        "mode": selected_mode,
70        "parameters": {
71            "compute_representativeness": 0.99,
72            "compute_unique_images_greedy": 0.01,
73            "compute_unique_images_deterministic": 0.99,
74            "compute_similar_images": 0.03,
75            "neighbour_count": 3,
76        },
77    }
78
79    wandb_activate = False
80
81    workflow_embedding_selection(
82        dataset_v51, dataset_info, MODEL_NAME, config, wandb_activate
83    )
84
85    # Check number of selected samples
86    results_field = BRAIN_TAXONOMY["field"]
87    n_samples_selected = 0
88    for key in BRAIN_TAXONOMY:
89        if "value_" in key:
90            value = BRAIN_TAXONOMY[key]
91            view_result = dataset_v51.match(F(results_field) == value)
92            n_samples = len(view_result)
93            print(f"Found {n_samples} samples for {results_field}/{value}")
94            n_samples_selected += n_samples
95
96    # Assert if no samples were selected
97    assert n_samples_selected != 0, "No samples were selected"

Tests the embedding selection workflow on a given dataset with specified model and configuration parameters.