config.config
1import os 2import psutil 3 4#: Select workflow list from 'WORKFLOWS = {...}' dictionary 5SELECTED_WORKFLOW = ["data_ingest"] # Choose from WORKFLOWS keys 6 7#: Select dataset from config/datasets.yaml 8SELECTED_DATASET = { 9 "name": "fisheye8k", 10 "n_samples": None, # 'None' (full dataset) or 'int' (subset of the dataset) 11 "custom_view": None, # 'None' (full dataset) or select function from utils/custom_view 12} 13 14#: Workflows and associated parameters 15WORKFLOWS = { 16 "aws_download": { 17 "mcity": { 18 "bucket": "mcity-data-engine", 19 "prefix": "", 20 "download_path": "output/datasets/annarbor_rolling", 21 "test_run": True, 22 "selected_dataset_overwrite": True, 23 } 24 }, 25 "embedding_selection": { 26 "mode": "compute", # "compute" or "load" 27 "parameters": { 28 "compute_representativeness": 0.99, 29 "compute_unique_images_greedy": 0.01, 30 "compute_unique_images_deterministic": 0.99, 31 "compute_similar_images": 0.03, 32 "neighbour_count": 3, 33 }, 34 "embedding_models": [ # Select from V51 "Embeddings" models https://docs.voxel51.com/model_zoo/models.html 35 "clip-vit-base32-torch", 36 # "open-clip-torch", 37 # "dinov2-vits14-torch", 38 # "dinov2-vits14-reg-torch", 39 # "mobilenet-v2-imagenet-torch", 40 # "resnet152-imagenet-torch", 41 # "vgg19-imagenet-torch", 42 # "classification-transformer-torch", 43 # "detection-transformer-torch", 44 # "zero-shot-detection-transformer-torch", 45 # "zero-shot-classification-transformer-torch", 46 ], 47 }, 48 "anomaly_detection": { 49 "mode": ["train", "inference"], # "train" and "inference" supported 50 "epochs": 12, 51 "early_stop_patience": 5, 52 "anomalib_image_models": { # Choose from https://anomalib.readthedocs.io/en/v1.2.0/markdown/guides/reference/models/image/index.html 53 "Padim": {}, 54 # "EfficientAd": {}, 55 # "Draem": {}, 56 # "Cfa": {}, 57 }, 58 "anomalib_eval_metrics": [ # Choose from https://anomalib.readthedocs.io/en/v1.2.0/markdown/guides/reference/metrics/index.html. Focus on standard metrics, computation of others can be expensive 59 "AUPR", 60 "AUROC", 61 "F1Max", 62 ], 63 "data_preparation": {"fisheye8k": {"location": "cam1", "rare_class": "Truck"}}, 64 }, 65 "auto_labeling": { 66 "mode": ["train","inference"], # "train" and "inference" supported 67 "model_source": [ 68 # "hf_models_objectdetection", 69 "ultralytics", 70 # "custom_codetr", 71 ], 72 "n_worker_dataloader": 8, 73 "epochs": 1, 74 "early_stop_patience": 0, 75 "early_stop_threshold": 0, 76 "learning_rate": 5e-05, 77 "weight_decay": 0.0001, 78 "max_grad_norm": 0.01, 79 "inference_settings": { 80 "do_eval": True, 81 "inference_on_test": True, 82 "model_hf": None, # None (automatic selection) or overwrite with Hugging Face ID. Assumes same model as selected below. 83 "detection_threshold": 0.2, 84 }, 85 "hf_models_objectdetection": { # HF Leaderboard: https://huggingface.co/spaces/hf-vision/object_detection_leaderboard 86 # "microsoft/conditional-detr-resnet-50": {"batch_size": 4}, 87 # "Omnifact/conditional-detr-resnet-101-dc5": {"batch_size": 1}, 88 # "facebook/detr-resnet-50": {"batch_size": 1}, 89 # "facebook/detr-resnet-50-dc5": {"batch_size": 1, "image_size": [960, 960]}, 90 # "facebook/detr-resnet-101": {"batch_size": 4, "image_size": [960, 960]}, 91 # "facebook/detr-resnet-101-dc5": {"batch_size": 1, "image_size": [960, 960]}, 92 # "facebook/deformable-detr-detic": { 93 # "batch_size": 4, 94 # "image_size": [960, 960], 95 # }, 96 # "facebook/deformable-detr-box-supervised": { 97 # "batch_size": 1, 98 # "image_size": [960, 960], 99 # }, 100 # "SenseTime/deformable-detr": {"batch_size": 4, "image_size": [960, 960]}, 101 # "SenseTime/deformable-detr-with-box-refine": { 102 # "batch_size": 1, 103 # "image_size": [960, 960], 104 # }, 105 # "jozhang97/deta-swin-large": { 106 # "batch_size": 1, 107 # "image_size": [960, 960], 108 # }, 109 # "jozhang97/deta-swin-large-o365": { 110 # "batch_size": 4, 111 # "image_size": [960, 960], 112 # }, 113 # "hustvl/yolos-base": {"batch_size": 4}, 114 "IDEA-Research/dab-detr-resnet-50": { 115 "batch_size": 4, 116 "image_size": [960, 960], 117 }, 118 # "PekingU/rtdetr_v2_r18vd": { 119 # "batch_size": 4, 120 # "image_size": [960, 960], 121 # }, 122 }, 123 "custom_codetr": { 124 "export_dataset_root": "output/datasets/codetr_data/", 125 "configs": [ 126 "projects/configs/co_deformable_detr/co_deformable_detr_r50_1x_coco.py", 127 "projects/configs/co_dino_vit/co_dino_5scale_vit_large_coco.py", 128 ], 129 "n_gpus": "1", 130 "container_tool": "docker", 131 }, 132 "ultralytics": { 133 "export_dataset_root": "output/datasets/ultralytics_data/", 134 "multi_scale": False, 135 "cos_lr": True, 136 "models": { # Pick from https://docs.ultralytics.com/models/ 137 # "yolo11n": {"batch_size": 8, "img_size": 1280}, 138 # "yolo11x": {"batch_size": 1, "img_size": 960}, 139 "yolo12n": {"batch_size": 8, "img_size": 1280}, 140 # "yolo12x": {"batch_size": 1, "img_size": 960}, 141 }, 142 }, 143 }, 144 "auto_labeling_zero_shot": { 145 "n_post_processing_worker_per_inference_worker": 5, 146 "n_worker_dataloader": 3, 147 "prefetch_factor_dataloader": 2, 148 "hf_models_zeroshot_objectdetection": { 149 "omlab/omdet-turbo-swin-tiny-hf": { # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=omlab%2Fomdet 150 "batch_size": 1, 151 "n_dataset_chunks": 1, # Number of chunks to split the dataset into for parallel processing 152 }, 153 "IDEA-Research/grounding-dino-tiny": { # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=IDEA-Research%2Fgrounding 154 "batch_size": 1, 155 "n_dataset_chunks": 1, 156 }, 157 "google/owlvit-large-patch14": { # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=google%2Fowlvit 158 "batch_size": 1, 159 "n_dataset_chunks": 1, 160 }, 161 "google/owlv2-base-patch16-finetuned": { # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=google%2Fowlv2 162 "batch_size": 1, 163 "n_dataset_chunks": 1, 164 }, 165 "google/owlv2-large-patch14-ensemble": { 166 "batch_size": 1, 167 "n_dataset_chunks": 1, 168 }, 169 }, 170 "detection_threshold": 0.2, 171 "object_classes": [ 172 "skater", 173 "child", 174 "bicycle", 175 "bicyclist", 176 "cyclist", 177 "bike", 178 "rider", 179 "motorcycle", 180 "motorcyclist", 181 "pedestrian", 182 "person", 183 "walker", 184 "jogger", 185 "runner", 186 "skateboarder", 187 "scooter", 188 "vehicle", 189 "car", 190 "bus", 191 "truck", 192 "taxi", 193 "van", 194 "pickup truck", 195 "trailer", 196 "emergency vehicle", 197 "delivery driver", 198 ], 199 }, 200 "auto_label_mask": { 201 "semantic_segmentation": { 202 "sam2": { 203 "prompt_field": None, 204 "models": [ 205 "segment-anything-2-hiera-tiny-image-torch", 206 "segment-anything-2-hiera-small-image-torch", 207 "segment-anything-2-hiera-base-plus-image-torch", 208 "segment-anything-2.1-hiera-tiny-image-torch", 209 "segment-anything-2.1-hiera-small-image-torch", 210 "segment-anything-2.1-hiera-base-plus-image-torch", 211 "segment-anything-2.1-hiera-large-image-torch", 212 ], 213 }, 214 }, 215 "depth_estimation": { 216 "dpt": { 217 "models": { 218 "Intel/dpt-swinv2-tiny-256", 219 "Intel/dpt-swinv2-large-384", 220 "Intel/dpt-beit-large-384", 221 "Intel/dpt-beit-large-512", 222 "Intel/dpt-large-ade", 223 "Intel/dpt-large", 224 "Intel/dpt-hybrid-midas", 225 "Intel/dpt-swinv2-base-384", 226 "Intel/dpt-beit-base-384", 227 }, 228 }, 229 "depth_anything": { 230 "models": { 231 "LiheYoung/depth-anything-base-hf", 232 "LiheYoung/depth-anything-large-hf", 233 "LiheYoung/depth-anything-small-hf", 234 }, 235 }, 236 "depth_pro": { 237 "models": { 238 "apple/DepthPro-hf", 239 } 240 }, 241 "glpn": { 242 "models": {"vinvino02/glpn-nyu", 243 "vinvino02/glpn-kitti"}, 244 }, 245 "zoe_depth": { 246 "models": { 247 "Intel/zoedepth-nyu-kitti", 248 "Intel/zoedepth-nyu", 249 "Intel/zoedepth-kitti", 250 }, 251 }, 252 }, 253 }, 254 "ensemble_selection": { 255 "field_includes": "pred_zsod_", # V51 field used for detections, "pred_zsod_" default for zero-shot object detection models 256 "agreement_threshold": 3, # Threshold for n models necessary for agreement between models 257 "iou_threshold": 0.5, # Threshold for IoU between bboxes to consider them as overlapping 258 "max_bbox_size": 0.01, # Value between [0,1] for the max size of considered bboxes 259 "positive_classes": [ # Classes to consider, must be subset of available classes in the detections. Example for Vulnerable Road Users. 260 "skater", 261 "child", 262 "bicycle", 263 "bicyclist", 264 "cyclist", 265 "bike", 266 "rider", 267 "motorcycle", 268 "motorcyclist", 269 "pedestrian", 270 "person", 271 "walker", 272 "jogger", 273 "runner", 274 "skateboarder", 275 "scooter", 276 "delivery driver", 277 ], 278 }, 279 "class_mapping": { 280 # get the source and target dataset names from datasets.yaml 281 "dataset_source": "fisheye8k", 282 "dataset_target": "mcity_fisheye_2000", 283 # Set to True to change detection labels in the dataset, Set to False to just add tags without changing labels in the dataset. 284 "change_labels": False, 285 286 # Choose any number of models from the options below hf_models_zeroshot_classification, to not include a model for class mapping, just comment it out 287 #https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModelForZeroShotImageClassification 288 "hf_models_zeroshot_classification": [ 289 "Salesforce/blip2-itm-vit-g", 290 "openai/clip-vit-large-patch14", 291 "google/siglip-so400m-patch14-384", 292 # "google/siglip2-base-patch16-224", 293 "kakaobrain/align-base", 294 "BAAI/AltCLIP", 295 "CIDAS/clipseg-rd64-refined", 296 ], 297 "thresholds": {"confidence": 0.2}, 298 "candidate_labels": { 299 # Target class(Generalized class) : Source classes(specific categories) 300 "Car": ["car", "van", "pickup"], 301 "Truck": ["truck", "pickup"], 302 # One_to_one_mapping 303 "Bike": ["motorbike/cycler"], 304 # Can add other class mappings in here 305 }, 306 }, 307 "data_ingest": { 308 "dataset_name": "custom_data", 309 "annotation_format": "auto", # Options: "auto", "coco", "voc", "yolo", "image_only", "video" 310 "dataset_dir": "/home/dataengine/Downloads/vid", 311 "split_percentages": [0.7, 0.15, 0.15], # Optional train/val/test 312 "fps": 2, #Frames per second to convert a Video dataset to Fiftyone Image Dataset 313 } 314} 315 316"""Global settings""" 317#: Non-persistent datasets are deleted from the database each time the database is shut down 318PERSISTENT = True 319#: Accepted splits for data processing 320ACCEPTED_SPLITS = ["train", "val", "test"] 321cpu_count = len(psutil.Process().cpu_affinity()) 322#: Max. number of CPU workers 323NUM_WORKERS_MAX = 32 324NUM_WORKERS = NUM_WORKERS_MAX if cpu_count > NUM_WORKERS_MAX else cpu_count 325#: SEED for reproducability 326GLOBAL_SEED = 0 327 328"""Hugging Face Config""" 329#: Hugging Face name or Organization 330HF_ROOT = "mcity-data-engine" # https://huggingface.co/mcity-data-engine 331#: Determins if model weights should be uploaded to Hugging Face 332HF_DO_UPLOAD = False 333 334"""Weights and Biases Config""" 335#: Determines if tracking with Weights and Biases is activated 336WANDB_ACTIVE = True 337 338"""Voxel51 Config""" 339#: Address for Voxel51 connection 340V51_ADDRESS = "localhost" 341#: Port for Voxel51 connection 342V51_PORT = 5151 343#: Remote app sessions will listen to any connection to their ports 344V51_REMOTE = True
SELECTED_WORKFLOW =
['data_ingest']
SELECTED_DATASET =
{'name': 'fisheye8k', 'n_samples': None, 'custom_view': None}
WORKFLOWS =
{'aws_download': {'mcity': {'bucket': 'mcity-data-engine', 'prefix': '', 'download_path': 'output/datasets/annarbor_rolling', 'test_run': True, 'selected_dataset_overwrite': True}}, 'embedding_selection': {'mode': 'compute', 'parameters': {'compute_representativeness': 0.99, 'compute_unique_images_greedy': 0.01, 'compute_unique_images_deterministic': 0.99, 'compute_similar_images': 0.03, 'neighbour_count': 3}, 'embedding_models': ['clip-vit-base32-torch']}, 'anomaly_detection': {'mode': ['train', 'inference'], 'epochs': 12, 'early_stop_patience': 5, 'anomalib_image_models': {'Padim': {}}, 'anomalib_eval_metrics': ['AUPR', 'AUROC', 'F1Max'], 'data_preparation': {'fisheye8k': {'location': 'cam1', 'rare_class': 'Truck'}}}, 'auto_labeling': {'mode': ['train', 'inference'], 'model_source': ['ultralytics'], 'n_worker_dataloader': 8, 'epochs': 1, 'early_stop_patience': 0, 'early_stop_threshold': 0, 'learning_rate': 5e-05, 'weight_decay': 0.0001, 'max_grad_norm': 0.01, 'inference_settings': {'do_eval': True, 'inference_on_test': True, 'model_hf': None, 'detection_threshold': 0.2}, 'hf_models_objectdetection': {'IDEA-Research/dab-detr-resnet-50': {'batch_size': 4, 'image_size': [960, 960]}}, 'custom_codetr': {'export_dataset_root': 'output/datasets/codetr_data/', 'configs': ['projects/configs/co_deformable_detr/co_deformable_detr_r50_1x_coco.py', 'projects/configs/co_dino_vit/co_dino_5scale_vit_large_coco.py'], 'n_gpus': '1', 'container_tool': 'docker'}, 'ultralytics': {'export_dataset_root': 'output/datasets/ultralytics_data/', 'multi_scale': False, 'cos_lr': True, 'models': {'yolo12n': {'batch_size': 8, 'img_size': 1280}}}}, 'auto_labeling_zero_shot': {'n_post_processing_worker_per_inference_worker': 5, 'n_worker_dataloader': 3, 'prefetch_factor_dataloader': 2, 'hf_models_zeroshot_objectdetection': {'omlab/omdet-turbo-swin-tiny-hf': {'batch_size': 1, 'n_dataset_chunks': 1}, 'IDEA-Research/grounding-dino-tiny': {'batch_size': 1, 'n_dataset_chunks': 1}, 'google/owlvit-large-patch14': {'batch_size': 1, 'n_dataset_chunks': 1}, 'google/owlv2-base-patch16-finetuned': {'batch_size': 1, 'n_dataset_chunks': 1}, 'google/owlv2-large-patch14-ensemble': {'batch_size': 1, 'n_dataset_chunks': 1}}, 'detection_threshold': 0.2, 'object_classes': ['skater', 'child', 'bicycle', 'bicyclist', 'cyclist', 'bike', 'rider', 'motorcycle', 'motorcyclist', 'pedestrian', 'person', 'walker', 'jogger', 'runner', 'skateboarder', 'scooter', 'vehicle', 'car', 'bus', 'truck', 'taxi', 'van', 'pickup truck', 'trailer', 'emergency vehicle', 'delivery driver']}, 'auto_label_mask': {'semantic_segmentation': {'sam2': {'prompt_field': None, 'models': ['segment-anything-2-hiera-tiny-image-torch', 'segment-anything-2-hiera-small-image-torch', 'segment-anything-2-hiera-base-plus-image-torch', 'segment-anything-2.1-hiera-tiny-image-torch', 'segment-anything-2.1-hiera-small-image-torch', 'segment-anything-2.1-hiera-base-plus-image-torch', 'segment-anything-2.1-hiera-large-image-torch']}}, 'depth_estimation': {'dpt': {'models': {'Intel/dpt-beit-large-512', 'Intel/dpt-swinv2-base-384', 'Intel/dpt-large-ade', 'Intel/dpt-beit-base-384', 'Intel/dpt-beit-large-384', 'Intel/dpt-large', 'Intel/dpt-hybrid-midas', 'Intel/dpt-swinv2-tiny-256', 'Intel/dpt-swinv2-large-384'}}, 'depth_anything': {'models': {'LiheYoung/depth-anything-large-hf', 'LiheYoung/depth-anything-base-hf', 'LiheYoung/depth-anything-small-hf'}}, 'depth_pro': {'models': {'apple/DepthPro-hf'}}, 'glpn': {'models': {'vinvino02/glpn-kitti', 'vinvino02/glpn-nyu'}}, 'zoe_depth': {'models': {'Intel/zoedepth-kitti', 'Intel/zoedepth-nyu-kitti', 'Intel/zoedepth-nyu'}}}}, 'ensemble_selection': {'field_includes': 'pred_zsod_', 'agreement_threshold': 3, 'iou_threshold': 0.5, 'max_bbox_size': 0.01, 'positive_classes': ['skater', 'child', 'bicycle', 'bicyclist', 'cyclist', 'bike', 'rider', 'motorcycle', 'motorcyclist', 'pedestrian', 'person', 'walker', 'jogger', 'runner', 'skateboarder', 'scooter', 'delivery driver']}, 'class_mapping': {'dataset_source': 'fisheye8k', 'dataset_target': 'mcity_fisheye_2000', 'change_labels': False, 'hf_models_zeroshot_classification': ['Salesforce/blip2-itm-vit-g', 'openai/clip-vit-large-patch14', 'google/siglip-so400m-patch14-384', 'kakaobrain/align-base', 'BAAI/AltCLIP', 'CIDAS/clipseg-rd64-refined'], 'thresholds': {'confidence': 0.2}, 'candidate_labels': {'Car': ['car', 'van', 'pickup'], 'Truck': ['truck', 'pickup'], 'Bike': ['motorbike/cycler']}}, 'data_ingest': {'dataset_name': 'custom_data', 'annotation_format': 'auto', 'dataset_dir': '/home/dataengine/Downloads/vid', 'split_percentages': [0.7, 0.15, 0.15], 'fps': 2}}
Global settings
PERSISTENT =
True
ACCEPTED_SPLITS =
['train', 'val', 'test']
cpu_count =
4
NUM_WORKERS_MAX =
32
NUM_WORKERS =
4
GLOBAL_SEED =
0
Hugging Face Config
HF_ROOT =
'mcity-data-engine'
HF_DO_UPLOAD =
False
Weights and Biases Config
WANDB_ACTIVE =
True
Voxel51 Config
V51_ADDRESS =
'localhost'
V51_PORT =
5151
V51_REMOTE =
True