tests.workflow_embedding_selection_test
1import os 2 3import fiftyone as fo 4import pytest 5from fiftyone import ViewField as F 6from fiftyone.utils.huggingface import load_from_hub 7 8import config.config 9from main import workflow_embedding_selection 10from utils.dataset_loader import load_dataset_info 11from utils.logging import configure_logging 12from workflows.embedding_selection import BRAIN_TAXONOMY 13 14 15@pytest.fixture(autouse=True) 16def deactivate_hf_sync(): 17 config.config.HF_DO_UPLOAD = False 18 19 20@pytest.fixture(autouse=True) 21def setup_logging(): 22 configure_logging() 23 24 25max_samples = 100 26 27 28@pytest.fixture 29def dataset_v51(): 30 """Fixture to load a FiftyOne dataset from the hub.""" 31 dataset_name_hub = "Voxel51/fisheye8k" 32 dataset_name = "fisheye8k_v51_brain_test" 33 try: 34 dataset = load_from_hub( 35 repo_id=dataset_name_hub, max_samples=max_samples, name=dataset_name 36 ) 37 except: 38 dataset = fo.load_dataset(dataset_name) 39 assert dataset is not None, "Failed to load or create the FiftyOne dataset" 40 return dataset 41 42 43@pytest.mark.parametrize("mode", ["compute", "load", "load_hf"]) 44def test_embedding_selection(dataset_v51, mode): 45 """Tests the embedding selection workflow on a given dataset with specified model and configuration parameters.""" 46 47 MODEL_NAME = "mobilenet-v2-imagenet-torch" 48 selected_mode = mode 49 if mode == "load_hf": 50 local_folder = "./output/embeddings/fisheye8k_v51_brain_test/" 51 model_name_key = MODEL_NAME.replace("-", "_") 52 for filename in os.listdir(local_folder): 53 if model_name_key in filename: 54 file_path = os.path.join(local_folder, filename) 55 try: 56 os.remove(file_path) 57 print(f"Deleted: {file_path}") 58 except OSError as e: 59 print(f"Error deleting {file_path}: {e}") 60 selected_mode = "load" 61 62 dataset_info = load_dataset_info("fisheye8k") # Use loader for actual dataset 63 dataset_info["name"] = ( 64 "fisheye8k_v51_brain_test" # Update with test name for local tests where both exist 65 ) 66 67 config = { 68 "mode": selected_mode, 69 "parameters": { 70 "compute_representativeness": 0.99, 71 "compute_unique_images_greedy": 0.01, 72 "compute_unique_images_deterministic": 0.99, 73 "compute_similar_images": 0.03, 74 "neighbour_count": 3, 75 }, 76 } 77 78 wandb_activate = False 79 80 workflow_embedding_selection( 81 dataset_v51, dataset_info, MODEL_NAME, config, wandb_activate 82 ) 83 84 # Check number of selected samples 85 results_field = BRAIN_TAXONOMY["field"] 86 n_samples_selected = 0 87 for key in BRAIN_TAXONOMY: 88 if "value_" in key: 89 value = BRAIN_TAXONOMY[key] 90 view_result = dataset_v51.match(F(results_field) == value) 91 n_samples = len(view_result) 92 print(f"Found {n_samples} samples for {results_field}/{value}") 93 n_samples_selected += n_samples 94 95 # Assert if no samples were selected 96 assert n_samples_selected != 0, "No samples were selected"
@pytest.fixture(autouse=True)
def
deactivate_hf_sync():
@pytest.fixture(autouse=True)
def
setup_logging():
max_samples =
100
@pytest.fixture
def
dataset_v51():
29@pytest.fixture 30def dataset_v51(): 31 """Fixture to load a FiftyOne dataset from the hub.""" 32 dataset_name_hub = "Voxel51/fisheye8k" 33 dataset_name = "fisheye8k_v51_brain_test" 34 try: 35 dataset = load_from_hub( 36 repo_id=dataset_name_hub, max_samples=max_samples, name=dataset_name 37 ) 38 except: 39 dataset = fo.load_dataset(dataset_name) 40 assert dataset is not None, "Failed to load or create the FiftyOne dataset" 41 return dataset
Fixture to load a FiftyOne dataset from the hub.
@pytest.mark.parametrize('mode', ['compute', 'load', 'load_hf'])
def
test_embedding_selection(dataset_v51, mode):
44@pytest.mark.parametrize("mode", ["compute", "load", "load_hf"]) 45def test_embedding_selection(dataset_v51, mode): 46 """Tests the embedding selection workflow on a given dataset with specified model and configuration parameters.""" 47 48 MODEL_NAME = "mobilenet-v2-imagenet-torch" 49 selected_mode = mode 50 if mode == "load_hf": 51 local_folder = "./output/embeddings/fisheye8k_v51_brain_test/" 52 model_name_key = MODEL_NAME.replace("-", "_") 53 for filename in os.listdir(local_folder): 54 if model_name_key in filename: 55 file_path = os.path.join(local_folder, filename) 56 try: 57 os.remove(file_path) 58 print(f"Deleted: {file_path}") 59 except OSError as e: 60 print(f"Error deleting {file_path}: {e}") 61 selected_mode = "load" 62 63 dataset_info = load_dataset_info("fisheye8k") # Use loader for actual dataset 64 dataset_info["name"] = ( 65 "fisheye8k_v51_brain_test" # Update with test name for local tests where both exist 66 ) 67 68 config = { 69 "mode": selected_mode, 70 "parameters": { 71 "compute_representativeness": 0.99, 72 "compute_unique_images_greedy": 0.01, 73 "compute_unique_images_deterministic": 0.99, 74 "compute_similar_images": 0.03, 75 "neighbour_count": 3, 76 }, 77 } 78 79 wandb_activate = False 80 81 workflow_embedding_selection( 82 dataset_v51, dataset_info, MODEL_NAME, config, wandb_activate 83 ) 84 85 # Check number of selected samples 86 results_field = BRAIN_TAXONOMY["field"] 87 n_samples_selected = 0 88 for key in BRAIN_TAXONOMY: 89 if "value_" in key: 90 value = BRAIN_TAXONOMY[key] 91 view_result = dataset_v51.match(F(results_field) == value) 92 n_samples = len(view_result) 93 print(f"Found {n_samples} samples for {results_field}/{value}") 94 n_samples_selected += n_samples 95 96 # Assert if no samples were selected 97 assert n_samples_selected != 0, "No samples were selected"
Tests the embedding selection workflow on a given dataset with specified model and configuration parameters.