tests.workflow_data_ingest_test

 1import pytest
 2import fiftyone as fo
 3from huggingface_hub import snapshot_download
 4from config import config
 5from workflows.data_ingest import run_data_ingest
 6from utils.logging import configure_logging
 7import logging
 8import os
 9
10
11@pytest.fixture(autouse=True)
12def setup_logging():
13    configure_logging()
14
15
16@pytest.fixture(autouse=True)
17def deactivate_wandb_sync():
18    config.WANDB_ACTIVE = False
19
20
21@pytest.fixture
22def test_video_ingest_dataset_dir():
23    """
24    Fixture that downloads a folder with a .mov file from Hugging Face Hub
25    and returns the local path so it can be passed into dataset_ingest.
26    """
27    local_dir = snapshot_download(
28        repo_id="Abeyankar/video-ingest-test",  # Your dataset with sample.mov
29        repo_type="dataset",
30        local_dir="/tmp/video-ingest-test",
31        local_dir_use_symlinks=False
32    )
33    return local_dir
34
35
36def test_dataset_ingest_workflow_video(test_video_ingest_dataset_dir):
37    """
38    Test the dataset_ingest workflow using a single .mov file downloaded from HF Hub.
39    Verifies that frames are extracted, splits are applied, and ground_truth exists.
40    """
41
42    base_name = "video_ingest_test"
43
44    config.WORKFLOWS["data_ingest"] = {
45        "dataset_name": base_name,
46        "dataset_dir": test_video_ingest_dataset_dir,
47        "annotation_format": "auto",  # Will auto-detect 'video'
48        "fps": 1,
49        "split_percentages": [0.7, 0.15, 0.15],
50    }
51
52    # Run the workflow
53    run_data_ingest()
54
55    dataset_name = "video_ingest_test1"
56
57    # Load and validate the output dataset
58    dataset = fo.load_dataset(dataset_name)
59
60    logging.info(f"Loaded dataset: {dataset.name}")
61    assert dataset is not None
62    assert dataset.name == dataset_name
63
64    tag_counts = dataset.count_sample_tags()
65    total_tagged = sum(tag_counts.values())
66    logging.info(f"Tag counts: {tag_counts}")
67    assert total_tagged == len(dataset), "Not all samples were tagged for split"
68
69    # Clean up
70    fo.delete_dataset(dataset_name)
@pytest.fixture(autouse=True)
def setup_logging():
12@pytest.fixture(autouse=True)
13def setup_logging():
14    configure_logging()
@pytest.fixture(autouse=True)
def deactivate_wandb_sync():
17@pytest.fixture(autouse=True)
18def deactivate_wandb_sync():
19    config.WANDB_ACTIVE = False
@pytest.fixture
def test_video_ingest_dataset_dir():
22@pytest.fixture
23def test_video_ingest_dataset_dir():
24    """
25    Fixture that downloads a folder with a .mov file from Hugging Face Hub
26    and returns the local path so it can be passed into dataset_ingest.
27    """
28    local_dir = snapshot_download(
29        repo_id="Abeyankar/video-ingest-test",  # Your dataset with sample.mov
30        repo_type="dataset",
31        local_dir="/tmp/video-ingest-test",
32        local_dir_use_symlinks=False
33    )
34    return local_dir

Fixture that downloads a folder with a .mov file from Hugging Face Hub and returns the local path so it can be passed into dataset_ingest.

def test_dataset_ingest_workflow_video(test_video_ingest_dataset_dir):
37def test_dataset_ingest_workflow_video(test_video_ingest_dataset_dir):
38    """
39    Test the dataset_ingest workflow using a single .mov file downloaded from HF Hub.
40    Verifies that frames are extracted, splits are applied, and ground_truth exists.
41    """
42
43    base_name = "video_ingest_test"
44
45    config.WORKFLOWS["data_ingest"] = {
46        "dataset_name": base_name,
47        "dataset_dir": test_video_ingest_dataset_dir,
48        "annotation_format": "auto",  # Will auto-detect 'video'
49        "fps": 1,
50        "split_percentages": [0.7, 0.15, 0.15],
51    }
52
53    # Run the workflow
54    run_data_ingest()
55
56    dataset_name = "video_ingest_test1"
57
58    # Load and validate the output dataset
59    dataset = fo.load_dataset(dataset_name)
60
61    logging.info(f"Loaded dataset: {dataset.name}")
62    assert dataset is not None
63    assert dataset.name == dataset_name
64
65    tag_counts = dataset.count_sample_tags()
66    total_tagged = sum(tag_counts.values())
67    logging.info(f"Tag counts: {tag_counts}")
68    assert total_tagged == len(dataset), "Not all samples were tagged for split"
69
70    # Clean up
71    fo.delete_dataset(dataset_name)

Test the dataset_ingest workflow using a single .mov file downloaded from HF Hub. Verifies that frames are extracted, splits are applied, and ground_truth exists.