config.config
1import os 2import psutil 3 4#: Select workflow list from 'WORKFLOWS = {...}' dictionary 5SELECTED_WORKFLOW = ["embedding_selection"] # Choose from WORKFLOWS keys 6 7#: Select dataset from config/datasets.yaml 8SELECTED_DATASET = { 9 "name": "fisheye8k", 10 "n_samples": None, # 'None' (full dataset) or 'int' (subset of the dataset) 11 "custom_view": None, # 'None' (full dataset) or select function from utils/custom_view 12} 13 14#: Workflows and associated parameters 15WORKFLOWS = { 16 "aws_download": { 17 "mcity": { 18 "bucket": "mcity-data-engine", 19 "prefix": "", 20 "download_path": "output/datasets/annarbor_rolling", 21 "test_run": True, 22 "selected_dataset_overwrite": True, 23 } 24 }, 25 "embedding_selection": { 26 "mode": "compute", # "compute" or "load" 27 "parameters": { 28 "compute_representativeness": 0.99, 29 "compute_unique_images_greedy": 0.01, 30 "compute_unique_images_deterministic": 0.99, 31 "compute_similar_images": 0.03, 32 "neighbour_count": 3, 33 }, 34 "embedding_models": [ # Select from V51 "Embeddings" models https://docs.voxel51.com/model_zoo/models.html 35 "clip-vit-base32-torch", 36 # "open-clip-torch", 37 # "dinov2-vits14-torch", 38 # "dinov2-vits14-reg-torch", 39 # "mobilenet-v2-imagenet-torch", 40 # "resnet152-imagenet-torch", 41 # "vgg19-imagenet-torch", 42 # "classification-transformer-torch", 43 # "detection-transformer-torch", 44 # "zero-shot-detection-transformer-torch", 45 # "zero-shot-classification-transformer-torch", 46 ], 47 }, 48 "anomaly_detection": { 49 "mode": ["train", "inference"], # "train" and "inference" supported 50 "epochs": 12, 51 "early_stop_patience": 5, 52 "anomalib_image_models": { # Choose from https://anomalib.readthedocs.io/en/v1.2.0/markdown/guides/reference/models/image/index.html 53 "Padim": {}, 54 # "EfficientAd": {}, 55 # "Draem": {}, 56 # "Cfa": {}, 57 }, 58 "anomalib_eval_metrics": [ # Choose from https://anomalib.readthedocs.io/en/v1.2.0/markdown/guides/reference/metrics/index.html. Focus on standard metrics, computation of others can be expensive 59 "AUPR", 60 "AUROC", 61 "F1Max", 62 ], 63 "data_preparation": {"fisheye8k": {"location": "cam1", "rare_class": "Truck"}}, 64 }, 65 "auto_labeling": { 66 "mode": ["train"], # "train" and "inference" supported 67 "model_source": [ 68 # "hf_models_objectdetection", 69 "ultralytics", 70 # "custom_codetr", 71 ], 72 "n_worker_dataloader": 8, 73 "epochs": 1000, 74 "early_stop_patience": 0, 75 "early_stop_threshold": 0, 76 "learning_rate": 5e-05, 77 "weight_decay": 0.0001, 78 "max_grad_norm": 0.01, 79 "inference_settings": { 80 "do_eval": True, 81 "inference_on_test": True, 82 "model_hf": None, # None (automatic selection) or overwrite with Hugging Face ID. Assumes same model as selected below. 83 "detection_threshold": 0.2, 84 }, 85 "hf_models_objectdetection": { # HF Leaderboard: https://huggingface.co/spaces/hf-vision/object_detection_leaderboard 86 # "microsoft/conditional-detr-resnet-50": {"batch_size": 4}, 87 # "Omnifact/conditional-detr-resnet-101-dc5": {"batch_size": 1}, 88 # "facebook/detr-resnet-50": {"batch_size": 1}, 89 # "facebook/detr-resnet-50-dc5": {"batch_size": 1, "image_size": [960, 960]}, 90 # "facebook/detr-resnet-101": {"batch_size": 4, "image_size": [960, 960]}, 91 # "facebook/detr-resnet-101-dc5": {"batch_size": 1, "image_size": [960, 960]}, 92 # "facebook/deformable-detr-detic": { 93 # "batch_size": 4, 94 # "image_size": [960, 960], 95 # }, 96 # "facebook/deformable-detr-box-supervised": { 97 # "batch_size": 1, 98 # "image_size": [960, 960], 99 # }, 100 # "SenseTime/deformable-detr": {"batch_size": 4, "image_size": [960, 960]}, 101 # "SenseTime/deformable-detr-with-box-refine": { 102 # "batch_size": 1, 103 # "image_size": [960, 960], 104 # }, 105 # "jozhang97/deta-swin-large": { 106 # "batch_size": 1, 107 # "image_size": [960, 960], 108 # }, 109 # "jozhang97/deta-swin-large-o365": { 110 # "batch_size": 4, 111 # "image_size": [960, 960], 112 # }, 113 # "hustvl/yolos-base": {"batch_size": 4}, 114 "IDEA-Research/dab-detr-resnet-50": { 115 "batch_size": 4, 116 "image_size": [960, 960], 117 }, 118 # "PekingU/rtdetr_v2_r18vd": { 119 # "batch_size": 4, 120 # "image_size": [960, 960], 121 # }, 122 }, 123 "custom_codetr": { 124 "export_dataset_root": "output/datasets/codetr_data/", 125 "configs": [ 126 "projects/configs/co_deformable_detr/co_deformable_detr_r50_1x_coco.py", 127 "projects/configs/co_dino_vit/co_dino_5scale_vit_large_coco.py", 128 ], 129 "n_gpus": "1", 130 "container_tool": "docker", 131 }, 132 "ultralytics": { 133 "export_dataset_root": "output/datasets/ultralytics_data/", 134 "multi_scale": False, 135 "cos_lr": True, 136 "models": { # Pick from https://docs.ultralytics.com/models/ 137 # "yolo11n": {"batch_size": 8, "img_size": 1280}, 138 # "yolo11x": {"batch_size": 1, "img_size": 960}, 139 "yolo12n": {"batch_size": 8, "img_size": 1280}, 140 # "yolo12x": {"batch_size": 1, "img_size": 960}, 141 }, 142 }, 143 }, 144 "auto_labeling_zero_shot": { 145 "n_post_processing_worker_per_inference_worker": 5, 146 "n_worker_dataloader": 3, 147 "prefetch_factor_dataloader": 2, 148 "hf_models_zeroshot_objectdetection": { 149 "omlab/omdet-turbo-swin-tiny-hf": { # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=omlab%2Fomdet 150 "batch_size": 1, 151 "n_dataset_chunks": 1, # Number of chunks to split the dataset into for parallel processing 152 }, 153 "IDEA-Research/grounding-dino-tiny": { # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=IDEA-Research%2Fgrounding 154 "batch_size": 1, 155 "n_dataset_chunks": 1, 156 }, 157 "google/owlvit-large-patch14": { # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=google%2Fowlvit 158 "batch_size": 1, 159 "n_dataset_chunks": 1, 160 }, 161 "google/owlv2-base-patch16-finetuned": { # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=google%2Fowlv2 162 "batch_size": 1, 163 "n_dataset_chunks": 1, 164 }, 165 "google/owlv2-large-patch14-ensemble": { 166 "batch_size": 1, 167 "n_dataset_chunks": 1, 168 }, 169 }, 170 "detection_threshold": 0.2, 171 "object_classes": [ 172 "skater", 173 "child", 174 "bicycle", 175 "bicyclist", 176 "cyclist", 177 "bike", 178 "rider", 179 "motorcycle", 180 "motorcyclist", 181 "pedestrian", 182 "person", 183 "walker", 184 "jogger", 185 "runner", 186 "skateboarder", 187 "scooter", 188 "vehicle", 189 "car", 190 "bus", 191 "truck", 192 "taxi", 193 "van", 194 "pickup truck", 195 "trailer", 196 "emergency vehicle", 197 "delivery driver", 198 ], 199 }, 200 "auto_label_mask": { 201 "semantic_segmentation": { 202 "sam2": { 203 "prompt_field": None, 204 "models": [ 205 "segment-anything-2-hiera-tiny-image-torch", 206 "segment-anything-2-hiera-small-image-torch", 207 "segment-anything-2-hiera-base-plus-image-torch", 208 "segment-anything-2.1-hiera-tiny-image-torch", 209 "segment-anything-2.1-hiera-small-image-torch", 210 "segment-anything-2.1-hiera-base-plus-image-torch", 211 "segment-anything-2.1-hiera-large-image-torch", 212 ], 213 }, 214 }, 215 "depth_estimation": { 216 "dpt": { 217 "models": { 218 "Intel/dpt-swinv2-tiny-256", 219 "Intel/dpt-swinv2-large-384", 220 "Intel/dpt-beit-large-384", 221 "Intel/dpt-beit-large-512", 222 "Intel/dpt-large-ade", 223 "Intel/dpt-large", 224 "Intel/dpt-hybrid-midas", 225 "Intel/dpt-swinv2-base-384", 226 "Intel/dpt-beit-base-384", 227 }, 228 }, 229 "depth_anything": { 230 "models": { 231 "LiheYoung/depth-anything-base-hf", 232 "LiheYoung/depth-anything-large-hf", 233 "LiheYoung/depth-anything-small-hf", 234 }, 235 }, 236 "depth_pro": { 237 "models": { 238 "apple/DepthPro-hf", 239 } 240 }, 241 "glpn": { 242 "models": {"vinvino02/glpn-nyu", 243 "vinvino02/glpn-kitti"}, 244 }, 245 "zoe_depth": { 246 "models": { 247 "Intel/zoedepth-nyu-kitti", 248 "Intel/zoedepth-nyu", 249 "Intel/zoedepth-kitti", 250 }, 251 }, 252 }, 253 }, 254 "ensemble_selection": { 255 "field_includes": "pred_zsod_", # V51 field used for detections, "pred_zsod_" default for zero-shot object detection models 256 "agreement_threshold": 3, # Threshold for n models necessary for agreement between models 257 "iou_threshold": 0.5, # Threshold for IoU between bboxes to consider them as overlapping 258 "max_bbox_size": 0.01, # Value between [0,1] for the max size of considered bboxes 259 "positive_classes": [ # Classes to consider, must be subset of available classes in the detections. Example for Vulnerable Road Users. 260 "skater", 261 "child", 262 "bicycle", 263 "bicyclist", 264 "cyclist", 265 "bike", 266 "rider", 267 "motorcycle", 268 "motorcyclist", 269 "pedestrian", 270 "person", 271 "walker", 272 "jogger", 273 "runner", 274 "skateboarder", 275 "scooter", 276 "delivery driver", 277 ], 278 }, 279 "class_mapping": { 280 # get the source and target dataset names from datasets.yaml 281 "dataset_source": "fisheye8k", 282 "dataset_target": "mcity_fisheye_2000", 283 # Set to True to change detection labels in the dataset, Set to False to just add tags without changing labels in the dataset. 284 "change_labels": False, 285 286 # Choose any number of models from the options below hf_models_zeroshot_classification, to not include a model for class mapping, just comment it out 287 #https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModelForZeroShotImageClassification 288 "hf_models_zeroshot_classification": [ 289 "Salesforce/blip2-itm-vit-g", 290 "openai/clip-vit-large-patch14", 291 "google/siglip-so400m-patch14-384", 292 # "google/siglip2-base-patch16-224", 293 "kakaobrain/align-base", 294 "BAAI/AltCLIP", 295 "CIDAS/clipseg-rd64-refined", 296 ], 297 "thresholds": {"confidence": 0.2}, 298 "candidate_labels": { 299 # Target class(Generalized class) : Source classes(specific categories) 300 "Car": ["car", "van", "pickup"], 301 "Truck": ["truck", "pickup"], 302 # One_to_one_mapping 303 "Bike": ["motorbike/cycler"], 304 # Can add other class mappings in here 305 }, 306 }, 307} 308 309"""Global settings""" 310#: Non-persistent datasets are deleted from the database each time the database is shut down 311PERSISTENT = True 312#: Accepted splits for data processing 313ACCEPTED_SPLITS = ["train", "val", "test"] 314cpu_count = len(psutil.Process().cpu_affinity()) 315#: Max. number of CPU workers 316NUM_WORKERS_MAX = 32 317NUM_WORKERS = NUM_WORKERS_MAX if cpu_count > NUM_WORKERS_MAX else cpu_count 318#: SEED for reproducability 319GLOBAL_SEED = 0 320 321"""Hugging Face Config""" 322#: Hugging Face name or Organization 323HF_ROOT = "mcity-data-engine" # https://huggingface.co/mcity-data-engine 324#: Determins if model weights should be uploaded to Hugging Face 325HF_DO_UPLOAD = False 326 327"""Weights and Biases Config""" 328#: Determines if tracking with Weights and Biases is activated 329WANDB_ACTIVE = True 330 331"""Voxel51 Config""" 332#: Address for Voxel51 connection 333V51_ADDRESS = "localhost" 334#: Port for Voxel51 connection 335V51_PORT = 5151 336#: Remote app sessions will listen to any connection to their ports 337V51_REMOTE = True
SELECTED_WORKFLOW =
['embedding_selection']
SELECTED_DATASET =
{'name': 'fisheye8k', 'n_samples': None, 'custom_view': None}
WORKFLOWS =
{'aws_download': {'mcity': {'bucket': 'mcity-data-engine', 'prefix': '', 'download_path': 'output/datasets/annarbor_rolling', 'test_run': True, 'selected_dataset_overwrite': True}}, 'embedding_selection': {'mode': 'compute', 'parameters': {'compute_representativeness': 0.99, 'compute_unique_images_greedy': 0.01, 'compute_unique_images_deterministic': 0.99, 'compute_similar_images': 0.03, 'neighbour_count': 3}, 'embedding_models': ['clip-vit-base32-torch']}, 'anomaly_detection': {'mode': ['train', 'inference'], 'epochs': 12, 'early_stop_patience': 5, 'anomalib_image_models': {'Padim': {}}, 'anomalib_eval_metrics': ['AUPR', 'AUROC', 'F1Max'], 'data_preparation': {'fisheye8k': {'location': 'cam1', 'rare_class': 'Truck'}}}, 'auto_labeling': {'mode': ['train'], 'model_source': ['ultralytics'], 'n_worker_dataloader': 8, 'epochs': 1000, 'early_stop_patience': 0, 'early_stop_threshold': 0, 'learning_rate': 5e-05, 'weight_decay': 0.0001, 'max_grad_norm': 0.01, 'inference_settings': {'do_eval': True, 'inference_on_test': True, 'model_hf': None, 'detection_threshold': 0.2}, 'hf_models_objectdetection': {'IDEA-Research/dab-detr-resnet-50': {'batch_size': 4, 'image_size': [960, 960]}}, 'custom_codetr': {'export_dataset_root': 'output/datasets/codetr_data/', 'configs': ['projects/configs/co_deformable_detr/co_deformable_detr_r50_1x_coco.py', 'projects/configs/co_dino_vit/co_dino_5scale_vit_large_coco.py'], 'n_gpus': '1', 'container_tool': 'docker'}, 'ultralytics': {'export_dataset_root': 'output/datasets/ultralytics_data/', 'multi_scale': False, 'cos_lr': True, 'models': {'yolo12n': {'batch_size': 8, 'img_size': 1280}}}}, 'auto_labeling_zero_shot': {'n_post_processing_worker_per_inference_worker': 5, 'n_worker_dataloader': 3, 'prefetch_factor_dataloader': 2, 'hf_models_zeroshot_objectdetection': {'omlab/omdet-turbo-swin-tiny-hf': {'batch_size': 1, 'n_dataset_chunks': 1}, 'IDEA-Research/grounding-dino-tiny': {'batch_size': 1, 'n_dataset_chunks': 1}, 'google/owlvit-large-patch14': {'batch_size': 1, 'n_dataset_chunks': 1}, 'google/owlv2-base-patch16-finetuned': {'batch_size': 1, 'n_dataset_chunks': 1}, 'google/owlv2-large-patch14-ensemble': {'batch_size': 1, 'n_dataset_chunks': 1}}, 'detection_threshold': 0.2, 'object_classes': ['skater', 'child', 'bicycle', 'bicyclist', 'cyclist', 'bike', 'rider', 'motorcycle', 'motorcyclist', 'pedestrian', 'person', 'walker', 'jogger', 'runner', 'skateboarder', 'scooter', 'vehicle', 'car', 'bus', 'truck', 'taxi', 'van', 'pickup truck', 'trailer', 'emergency vehicle', 'delivery driver']}, 'auto_label_mask': {'semantic_segmentation': {'sam2': {'prompt_field': None, 'models': ['segment-anything-2-hiera-tiny-image-torch', 'segment-anything-2-hiera-small-image-torch', 'segment-anything-2-hiera-base-plus-image-torch', 'segment-anything-2.1-hiera-tiny-image-torch', 'segment-anything-2.1-hiera-small-image-torch', 'segment-anything-2.1-hiera-base-plus-image-torch', 'segment-anything-2.1-hiera-large-image-torch']}}, 'depth_estimation': {'dpt': {'models': {'Intel/dpt-swinv2-base-384', 'Intel/dpt-hybrid-midas', 'Intel/dpt-swinv2-large-384', 'Intel/dpt-beit-large-512', 'Intel/dpt-swinv2-tiny-256', 'Intel/dpt-beit-large-384', 'Intel/dpt-beit-base-384', 'Intel/dpt-large-ade', 'Intel/dpt-large'}}, 'depth_anything': {'models': {'LiheYoung/depth-anything-base-hf', 'LiheYoung/depth-anything-small-hf', 'LiheYoung/depth-anything-large-hf'}}, 'depth_pro': {'models': {'apple/DepthPro-hf'}}, 'glpn': {'models': {'vinvino02/glpn-kitti', 'vinvino02/glpn-nyu'}}, 'zoe_depth': {'models': {'Intel/zoedepth-nyu', 'Intel/zoedepth-nyu-kitti', 'Intel/zoedepth-kitti'}}}}, 'ensemble_selection': {'field_includes': 'pred_zsod_', 'agreement_threshold': 3, 'iou_threshold': 0.5, 'max_bbox_size': 0.01, 'positive_classes': ['skater', 'child', 'bicycle', 'bicyclist', 'cyclist', 'bike', 'rider', 'motorcycle', 'motorcyclist', 'pedestrian', 'person', 'walker', 'jogger', 'runner', 'skateboarder', 'scooter', 'delivery driver']}, 'class_mapping': {'dataset_source': 'fisheye8k', 'dataset_target': 'mcity_fisheye_2000', 'change_labels': False, 'hf_models_zeroshot_classification': ['Salesforce/blip2-itm-vit-g', 'openai/clip-vit-large-patch14', 'google/siglip-so400m-patch14-384', 'kakaobrain/align-base', 'BAAI/AltCLIP', 'CIDAS/clipseg-rd64-refined'], 'thresholds': {'confidence': 0.2}, 'candidate_labels': {'Car': ['car', 'van', 'pickup'], 'Truck': ['truck', 'pickup'], 'Bike': ['motorbike/cycler']}}}
Global settings
PERSISTENT =
True
ACCEPTED_SPLITS =
['train', 'val', 'test']
cpu_count =
4
NUM_WORKERS_MAX =
32
NUM_WORKERS =
4
GLOBAL_SEED =
0
Hugging Face Config
HF_ROOT =
'mcity-data-engine'
HF_DO_UPLOAD =
False
Weights and Biases Config
WANDB_ACTIVE =
True
Voxel51 Config
V51_ADDRESS =
'localhost'
V51_PORT =
5151
V51_REMOTE =
True