tests.workflow_data_ingest_test
1import pytest 2import fiftyone as fo 3from huggingface_hub import snapshot_download 4from config import config 5from workflows.data_ingest import run_data_ingest 6from utils.logging import configure_logging 7import logging 8import os 9 10 11@pytest.fixture(autouse=True) 12def setup_logging(): 13 configure_logging() 14 15 16@pytest.fixture(autouse=True) 17def deactivate_wandb_sync(): 18 config.WANDB_ACTIVE = False 19 20 21@pytest.fixture 22def test_video_ingest_dataset_dir(): 23 """ 24 Fixture that downloads a folder with a .mov file from Hugging Face Hub 25 and returns the local path so it can be passed into dataset_ingest. 26 """ 27 local_dir = snapshot_download( 28 repo_id="Abeyankar/video-ingest-test", # Your dataset with sample.mov 29 repo_type="dataset", 30 local_dir="/tmp/video-ingest-test", 31 local_dir_use_symlinks=False 32 ) 33 return local_dir 34 35 36def test_dataset_ingest_workflow_video(test_video_ingest_dataset_dir): 37 """ 38 Test the dataset_ingest workflow using a single .mov file downloaded from HF Hub. 39 Verifies that frames are extracted, splits are applied, and ground_truth exists. 40 """ 41 42 base_name = "video_ingest_test" 43 44 config.WORKFLOWS["data_ingest"] = { 45 "dataset_name": base_name, 46 "dataset_dir": test_video_ingest_dataset_dir, 47 "annotation_format": "auto", # Will auto-detect 'video' 48 "fps": 1, 49 "split_percentages": [0.7, 0.15, 0.15], 50 } 51 52 # Run the workflow 53 run_data_ingest() 54 55 dataset_name = "video_ingest_test1" 56 57 # Load and validate the output dataset 58 dataset = fo.load_dataset(dataset_name) 59 60 logging.info(f"Loaded dataset: {dataset.name}") 61 assert dataset is not None 62 assert dataset.name == dataset_name 63 64 tag_counts = dataset.count_sample_tags() 65 total_tagged = sum(tag_counts.values()) 66 logging.info(f"Tag counts: {tag_counts}") 67 assert total_tagged == len(dataset), "Not all samples were tagged for split" 68 69 # Clean up 70 fo.delete_dataset(dataset_name)
@pytest.fixture(autouse=True)
def
setup_logging():
@pytest.fixture(autouse=True)
def
deactivate_wandb_sync():
@pytest.fixture
def
test_video_ingest_dataset_dir():
22@pytest.fixture 23def test_video_ingest_dataset_dir(): 24 """ 25 Fixture that downloads a folder with a .mov file from Hugging Face Hub 26 and returns the local path so it can be passed into dataset_ingest. 27 """ 28 local_dir = snapshot_download( 29 repo_id="Abeyankar/video-ingest-test", # Your dataset with sample.mov 30 repo_type="dataset", 31 local_dir="/tmp/video-ingest-test", 32 local_dir_use_symlinks=False 33 ) 34 return local_dir
Fixture that downloads a folder with a .mov file from Hugging Face Hub and returns the local path so it can be passed into dataset_ingest.
def
test_dataset_ingest_workflow_video(test_video_ingest_dataset_dir):
37def test_dataset_ingest_workflow_video(test_video_ingest_dataset_dir): 38 """ 39 Test the dataset_ingest workflow using a single .mov file downloaded from HF Hub. 40 Verifies that frames are extracted, splits are applied, and ground_truth exists. 41 """ 42 43 base_name = "video_ingest_test" 44 45 config.WORKFLOWS["data_ingest"] = { 46 "dataset_name": base_name, 47 "dataset_dir": test_video_ingest_dataset_dir, 48 "annotation_format": "auto", # Will auto-detect 'video' 49 "fps": 1, 50 "split_percentages": [0.7, 0.15, 0.15], 51 } 52 53 # Run the workflow 54 run_data_ingest() 55 56 dataset_name = "video_ingest_test1" 57 58 # Load and validate the output dataset 59 dataset = fo.load_dataset(dataset_name) 60 61 logging.info(f"Loaded dataset: {dataset.name}") 62 assert dataset is not None 63 assert dataset.name == dataset_name 64 65 tag_counts = dataset.count_sample_tags() 66 total_tagged = sum(tag_counts.values()) 67 logging.info(f"Tag counts: {tag_counts}") 68 assert total_tagged == len(dataset), "Not all samples were tagged for split" 69 70 # Clean up 71 fo.delete_dataset(dataset_name)
Test the dataset_ingest workflow using a single .mov file downloaded from HF Hub. Verifies that frames are extracted, splits are applied, and ground_truth exists.