config.config
1import os 2import psutil 3 4#: Select workflow list from 'WORKFLOWS = {...}' dictionary 5SELECTED_WORKFLOW = ["auto_labeling"] # Choose from WORKFLOWS keys 6 7#: Select dataset from config/datasets.yaml 8SELECTED_DATASET = { 9 "name": "fisheye8k", 10 "n_samples": None, # 'None' (full dataset) or 'int' (subset of the dataset) 11 "custom_view": None, # 'None' (full dataset) or select function from utils/custom_view 12} 13 14#: Workflows and associated parameters 15WORKFLOWS = { 16 "aws_download": { 17 "mcity": { 18 "bucket": "mcity-data-engine", 19 "prefix": "", 20 "download_path": "output/datasets/annarbor_rolling", 21 "test_run": True, 22 "selected_dataset_overwrite": True, 23 } 24 }, 25 "embedding_selection": { 26 "mode": "compute", # "compute" or "load" 27 "parameters": { 28 "compute_representativeness": 0.99, 29 "compute_unique_images_greedy": 0.01, 30 "compute_unique_images_deterministic": 0.99, 31 "compute_similar_images": 0.03, 32 "neighbour_count": 3, 33 }, 34 "embedding_models": [ # Select from V51 "Embeddings" models https://docs.voxel51.com/model_zoo/models.html 35 "clip-vit-base32-torch", 36 # "open-clip-torch", 37 # "dinov2-vits14-torch", 38 # "dinov2-vits14-reg-torch", 39 # "mobilenet-v2-imagenet-torch", 40 # "resnet152-imagenet-torch", 41 # "vgg19-imagenet-torch", 42 # "classification-transformer-torch", 43 # "detection-transformer-torch", 44 # "zero-shot-detection-transformer-torch", 45 # "zero-shot-classification-transformer-torch", 46 ], 47 }, 48 "anomaly_detection": { 49 "mode": ["train", "inference"], # "train" and "inference" supported 50 "epochs": 12, 51 "early_stop_patience": 5, 52 "anomalib_image_models": { # Choose from https://anomalib.readthedocs.io/en/v1.2.0/markdown/guides/reference/models/image/index.html 53 "Padim": {}, 54 # "EfficientAd": {}, 55 # "Draem": {}, 56 # "Cfa": {}, 57 }, 58 "anomalib_eval_metrics": [ # Choose from https://anomalib.readthedocs.io/en/v1.2.0/markdown/guides/reference/metrics/index.html. Focus on standard metrics, computation of others can be expensive 59 "AUPR", 60 "AUROC", 61 "F1Max", 62 ], 63 "data_preparation": {"fisheye8k": {"location": "cam1", "rare_class": "Truck"}}, 64 }, 65 "auto_labeling": { 66 "mode": ["train","inference"], # "train" and "inference" supported 67 "model_source": [ 68 # "hf_models_objectdetection", 69 #"ultralytics", 70 # "custom_codetr", 71 "roboflow", 72 ], 73 "n_worker_dataloader": 8, 74 "epochs": 1, 75 "early_stop_patience": 0, 76 "early_stop_threshold": 0, 77 "learning_rate": 5e-05, 78 "weight_decay": 0.0001, 79 "max_grad_norm": 0.01, 80 "inference_settings": { 81 "do_eval": True, 82 "inference_on_test": True, 83 "model_hf": None, # None (automatic selection) or overwrite with Hugging Face ID. Assumes same model as selected below. 84 "detection_threshold": 0.2, 85 }, 86 "hf_models_objectdetection": { # HF Leaderboard: https://huggingface.co/spaces/hf-vision/object_detection_leaderboard 87 # "microsoft/conditional-detr-resnet-50": {"batch_size": 4}, 88 # "Omnifact/conditional-detr-resnet-101-dc5": {"batch_size": 1}, 89 # "facebook/detr-resnet-50": {"batch_size": 1}, 90 # "facebook/detr-resnet-50-dc5": {"batch_size": 1, "image_size": [960, 960]}, 91 # "facebook/detr-resnet-101": {"batch_size": 4, "image_size": [960, 960]}, 92 # "facebook/detr-resnet-101-dc5": {"batch_size": 1, "image_size": [960, 960]}, 93 # "facebook/deformable-detr-detic": { 94 # "batch_size": 4, 95 # "image_size": [960, 960], 96 # }, 97 # "facebook/deformable-detr-box-supervised": { 98 # "batch_size": 1, 99 # "image_size": [960, 960], 100 # }, 101 # "SenseTime/deformable-detr": {"batch_size": 4, "image_size": [960, 960]}, 102 # "SenseTime/deformable-detr-with-box-refine": { 103 # "batch_size": 1, 104 # "image_size": [960, 960], 105 # }, 106 # "jozhang97/deta-swin-large": { 107 # "batch_size": 1, 108 # "image_size": [960, 960], 109 # }, 110 # "jozhang97/deta-swin-large-o365": { 111 # "batch_size": 4, 112 # "image_size": [960, 960], 113 # }, 114 # "hustvl/yolos-base": {"batch_size": 4}, 115 "IDEA-Research/dab-detr-resnet-50": { 116 "batch_size": 4, 117 "image_size": [960, 960], 118 }, 119 # "PekingU/rtdetr_v2_r18vd": { 120 # "batch_size": 4, 121 # "image_size": [960, 960], 122 # }, 123 }, 124 "custom_codetr": { 125 "export_dataset_root": "output/datasets/codetr_data/", 126 "configs": [ 127 "projects/configs/co_deformable_detr/co_deformable_detr_r50_1x_coco.py", 128 "projects/configs/co_dino_vit/co_dino_5scale_vit_large_coco.py", 129 ], 130 "n_gpus": "1", 131 "container_tool": "docker", 132 }, 133 "roboflow": { # Roboflow RF-DETR configuration 134 "export_dataset_root": "output/datasets/roboflow_data/", 135 "configs": [ 136 "rfdetr_nano", 137 "rfdetr_small", 138 #"rfdetr_medium", 139 #"rfdetr_large", 140 ], 141 # RF-DETR specific parameters only 142 "batch_size": 4, # Override default batch size 143 "grad_accum_steps": 4, # Gradient accumulation steps 144 "lr_encoder": None, # Encoder-specific learning rate (optional) 145 "resolution": None, # Image resolution, must be divisible by 56 (optional) 146 "use_ema": True, # Exponential moving average 147 "gradient_checkpointing": False, # Memory optimization 148 "early_stopping_min_delta": 0.001, # Minimum improvement for early stopping 149 "early_stopping_use_ema": True, # Use EMA model for early stopping 150 }, 151 "ultralytics": { 152 "export_dataset_root": "output/datasets/ultralytics_data/", 153 "multi_scale": False, 154 "cos_lr": True, 155 "models": { # Pick from https://docs.ultralytics.com/models/ 156 # "yolo11n": {"batch_size": 8, "img_size": 1280}, 157 # "yolo11x": {"batch_size": 1, "img_size": 960}, 158 "yolo12n": {"batch_size": 8, "img_size": 1280}, 159 # "yolo12x": {"batch_size": 1, "img_size": 960}, 160 }, 161 }, 162 }, 163 "auto_labeling_zero_shot": { 164 "n_post_processing_worker_per_inference_worker": 5, 165 "n_worker_dataloader": 3, 166 "prefetch_factor_dataloader": 2, 167 "hf_models_zeroshot_objectdetection": { 168 "omlab/omdet-turbo-swin-tiny-hf": { # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=omlab%2Fomdet 169 "batch_size": 1, 170 "n_dataset_chunks": 1, # Number of chunks to split the dataset into for parallel processing 171 }, 172 "IDEA-Research/grounding-dino-tiny": { # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=IDEA-Research%2Fgrounding 173 "batch_size": 1, 174 "n_dataset_chunks": 1, 175 }, 176 "google/owlvit-large-patch14": { # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=google%2Fowlvit 177 "batch_size": 1, 178 "n_dataset_chunks": 1, 179 }, 180 "google/owlv2-base-patch16-finetuned": { # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=google%2Fowlv2 181 "batch_size": 1, 182 "n_dataset_chunks": 1, 183 }, 184 "google/owlv2-large-patch14-ensemble": { 185 "batch_size": 1, 186 "n_dataset_chunks": 1, 187 }, 188 }, 189 "detection_threshold": 0.2, 190 "object_classes": [ 191 "skater", 192 "child", 193 "bicycle", 194 "bicyclist", 195 "cyclist", 196 "bike", 197 "rider", 198 "motorcycle", 199 "motorcyclist", 200 "pedestrian", 201 "person", 202 "walker", 203 "jogger", 204 "runner", 205 "skateboarder", 206 "scooter", 207 "vehicle", 208 "car", 209 "bus", 210 "truck", 211 "taxi", 212 "van", 213 "pickup truck", 214 "trailer", 215 "emergency vehicle", 216 "delivery driver", 217 ], 218 }, 219 "auto_label_mask": { 220 "semantic_segmentation": { 221 "sam2": { 222 "prompt_field": None, 223 "models": [ 224 "segment-anything-2-hiera-tiny-image-torch", 225 "segment-anything-2-hiera-small-image-torch", 226 "segment-anything-2-hiera-base-plus-image-torch", 227 "segment-anything-2.1-hiera-tiny-image-torch", 228 "segment-anything-2.1-hiera-small-image-torch", 229 "segment-anything-2.1-hiera-base-plus-image-torch", 230 "segment-anything-2.1-hiera-large-image-torch", 231 ], 232 }, 233 }, 234 "depth_estimation": { 235 "dpt": { 236 "models": { 237 "Intel/dpt-swinv2-tiny-256", 238 "Intel/dpt-swinv2-large-384", 239 "Intel/dpt-beit-large-384", 240 "Intel/dpt-beit-large-512", 241 "Intel/dpt-large-ade", 242 "Intel/dpt-large", 243 "Intel/dpt-hybrid-midas", 244 "Intel/dpt-swinv2-base-384", 245 "Intel/dpt-beit-base-384", 246 }, 247 }, 248 "depth_anything": { 249 "models": { 250 "LiheYoung/depth-anything-base-hf", 251 "LiheYoung/depth-anything-large-hf", 252 "LiheYoung/depth-anything-small-hf", 253 }, 254 }, 255 "depth_pro": { 256 "models": { 257 "apple/DepthPro-hf", 258 } 259 }, 260 "glpn": { 261 "models": {"vinvino02/glpn-nyu", 262 "vinvino02/glpn-kitti"}, 263 }, 264 "zoe_depth": { 265 "models": { 266 "Intel/zoedepth-nyu-kitti", 267 "Intel/zoedepth-nyu", 268 "Intel/zoedepth-kitti", 269 }, 270 }, 271 }, 272 }, 273 "ensemble_selection": { 274 "field_includes": "pred_zsod_", # V51 field used for detections, "pred_zsod_" default for zero-shot object detection models 275 "agreement_threshold": 3, # Threshold for n models necessary for agreement between models 276 "iou_threshold": 0.5, # Threshold for IoU between bboxes to consider them as overlapping 277 "max_bbox_size": 0.01, # Value between [0,1] for the max size of considered bboxes 278 "positive_classes": [ # Classes to consider, must be subset of available classes in the detections. Example for Vulnerable Road Users. 279 "skater", 280 "child", 281 "bicycle", 282 "bicyclist", 283 "cyclist", 284 "bike", 285 "rider", 286 "motorcycle", 287 "motorcyclist", 288 "pedestrian", 289 "person", 290 "walker", 291 "jogger", 292 "runner", 293 "skateboarder", 294 "scooter", 295 "delivery driver", 296 ], 297 }, 298 "class_mapping": { 299 # get the source and target dataset names from datasets.yaml 300 "dataset_source": "fisheye8k", 301 "dataset_target": "mcity_fisheye_2000", 302 # Set to True to change detection labels in the dataset, Set to False to just add tags without changing labels in the dataset. 303 "change_labels": False, 304 305 # Choose any number of models from the options below hf_models_zeroshot_classification, to not include a model for class mapping, just comment it out 306 #https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModelForZeroShotImageClassification 307 "hf_models_zeroshot_classification": [ 308 "Salesforce/blip2-itm-vit-g", 309 "openai/clip-vit-large-patch14", 310 "google/siglip-so400m-patch14-384", 311 # "google/siglip2-base-patch16-224", 312 "kakaobrain/align-base", 313 "BAAI/AltCLIP", 314 "CIDAS/clipseg-rd64-refined", 315 ], 316 "thresholds": {"confidence": 0.2}, 317 "candidate_labels": { 318 # Target class(Generalized class) : Source classes(specific categories) 319 "Car": ["car", "van", "pickup"], 320 "Truck": ["truck", "pickup"], 321 # One_to_one_mapping 322 "Bike": ["motorbike/cycler"], 323 # Can add other class mappings in here 324 }, 325 }, 326 "data_ingest": { 327 "dataset_name": "custom_data", 328 "annotation_format": "auto", # Options: "auto", "coco", "voc", "yolo", "image_only", "video" 329 "dataset_dir": "/home/dataengine/Downloads/vid", 330 "split_percentages": [0.7, 0.15, 0.15], # Optional train/val/test 331 "fps": 2, #Frames per second to convert a Video dataset to Fiftyone Image Dataset 332 } 333} 334 335"""Global settings""" 336#: Non-persistent datasets are deleted from the database each time the database is shut down 337PERSISTENT = True 338#: Accepted splits for data processing 339ACCEPTED_SPLITS = ["train", "val", "test"] 340cpu_count = len(psutil.Process().cpu_affinity()) 341#: Max. number of CPU workers 342NUM_WORKERS_MAX = 32 343NUM_WORKERS = NUM_WORKERS_MAX if cpu_count > NUM_WORKERS_MAX else cpu_count 344#: SEED for reproducability 345GLOBAL_SEED = 0 346 347"""Hugging Face Config""" 348#: Hugging Face name or Organization 349HF_ROOT = "mcity-data-engine" # https://huggingface.co/mcity-data-engine 350#: Determins if model weights should be uploaded to Hugging Face 351HF_DO_UPLOAD = False 352 353"""Weights and Biases Config""" 354#: Determines if tracking with Weights and Biases is activated 355WANDB_ACTIVE = True 356 357"""Voxel51 Config""" 358#: Address for Voxel51 connection 359V51_ADDRESS = "localhost" 360#: Port for Voxel51 connection 361V51_PORT = 5151 362#: Remote app sessions will listen to any connection to their ports 363V51_REMOTE = True
SELECTED_WORKFLOW =
['auto_labeling']
SELECTED_DATASET =
{'name': 'fisheye8k', 'n_samples': None, 'custom_view': None}
WORKFLOWS =
{'aws_download': {'mcity': {'bucket': 'mcity-data-engine', 'prefix': '', 'download_path': 'output/datasets/annarbor_rolling', 'test_run': True, 'selected_dataset_overwrite': True}}, 'embedding_selection': {'mode': 'compute', 'parameters': {'compute_representativeness': 0.99, 'compute_unique_images_greedy': 0.01, 'compute_unique_images_deterministic': 0.99, 'compute_similar_images': 0.03, 'neighbour_count': 3}, 'embedding_models': ['clip-vit-base32-torch']}, 'anomaly_detection': {'mode': ['train', 'inference'], 'epochs': 12, 'early_stop_patience': 5, 'anomalib_image_models': {'Padim': {}}, 'anomalib_eval_metrics': ['AUPR', 'AUROC', 'F1Max'], 'data_preparation': {'fisheye8k': {'location': 'cam1', 'rare_class': 'Truck'}}}, 'auto_labeling': {'mode': ['train', 'inference'], 'model_source': ['roboflow'], 'n_worker_dataloader': 8, 'epochs': 1, 'early_stop_patience': 0, 'early_stop_threshold': 0, 'learning_rate': 5e-05, 'weight_decay': 0.0001, 'max_grad_norm': 0.01, 'inference_settings': {'do_eval': True, 'inference_on_test': True, 'model_hf': None, 'detection_threshold': 0.2}, 'hf_models_objectdetection': {'IDEA-Research/dab-detr-resnet-50': {'batch_size': 4, 'image_size': [960, 960]}}, 'custom_codetr': {'export_dataset_root': 'output/datasets/codetr_data/', 'configs': ['projects/configs/co_deformable_detr/co_deformable_detr_r50_1x_coco.py', 'projects/configs/co_dino_vit/co_dino_5scale_vit_large_coco.py'], 'n_gpus': '1', 'container_tool': 'docker'}, 'roboflow': {'export_dataset_root': 'output/datasets/roboflow_data/', 'configs': ['rfdetr_nano', 'rfdetr_small'], 'batch_size': 4, 'grad_accum_steps': 4, 'lr_encoder': None, 'resolution': None, 'use_ema': True, 'gradient_checkpointing': False, 'early_stopping_min_delta': 0.001, 'early_stopping_use_ema': True}, 'ultralytics': {'export_dataset_root': 'output/datasets/ultralytics_data/', 'multi_scale': False, 'cos_lr': True, 'models': {'yolo12n': {'batch_size': 8, 'img_size': 1280}}}}, 'auto_labeling_zero_shot': {'n_post_processing_worker_per_inference_worker': 5, 'n_worker_dataloader': 3, 'prefetch_factor_dataloader': 2, 'hf_models_zeroshot_objectdetection': {'omlab/omdet-turbo-swin-tiny-hf': {'batch_size': 1, 'n_dataset_chunks': 1}, 'IDEA-Research/grounding-dino-tiny': {'batch_size': 1, 'n_dataset_chunks': 1}, 'google/owlvit-large-patch14': {'batch_size': 1, 'n_dataset_chunks': 1}, 'google/owlv2-base-patch16-finetuned': {'batch_size': 1, 'n_dataset_chunks': 1}, 'google/owlv2-large-patch14-ensemble': {'batch_size': 1, 'n_dataset_chunks': 1}}, 'detection_threshold': 0.2, 'object_classes': ['skater', 'child', 'bicycle', 'bicyclist', 'cyclist', 'bike', 'rider', 'motorcycle', 'motorcyclist', 'pedestrian', 'person', 'walker', 'jogger', 'runner', 'skateboarder', 'scooter', 'vehicle', 'car', 'bus', 'truck', 'taxi', 'van', 'pickup truck', 'trailer', 'emergency vehicle', 'delivery driver']}, 'auto_label_mask': {'semantic_segmentation': {'sam2': {'prompt_field': None, 'models': ['segment-anything-2-hiera-tiny-image-torch', 'segment-anything-2-hiera-small-image-torch', 'segment-anything-2-hiera-base-plus-image-torch', 'segment-anything-2.1-hiera-tiny-image-torch', 'segment-anything-2.1-hiera-small-image-torch', 'segment-anything-2.1-hiera-base-plus-image-torch', 'segment-anything-2.1-hiera-large-image-torch']}}, 'depth_estimation': {'dpt': {'models': {'Intel/dpt-beit-base-384', 'Intel/dpt-hybrid-midas', 'Intel/dpt-beit-large-512', 'Intel/dpt-large', 'Intel/dpt-swinv2-base-384', 'Intel/dpt-swinv2-large-384', 'Intel/dpt-beit-large-384', 'Intel/dpt-large-ade', 'Intel/dpt-swinv2-tiny-256'}}, 'depth_anything': {'models': {'LiheYoung/depth-anything-large-hf', 'LiheYoung/depth-anything-small-hf', 'LiheYoung/depth-anything-base-hf'}}, 'depth_pro': {'models': {'apple/DepthPro-hf'}}, 'glpn': {'models': {'vinvino02/glpn-nyu', 'vinvino02/glpn-kitti'}}, 'zoe_depth': {'models': {'Intel/zoedepth-kitti', 'Intel/zoedepth-nyu-kitti', 'Intel/zoedepth-nyu'}}}}, 'ensemble_selection': {'field_includes': 'pred_zsod_', 'agreement_threshold': 3, 'iou_threshold': 0.5, 'max_bbox_size': 0.01, 'positive_classes': ['skater', 'child', 'bicycle', 'bicyclist', 'cyclist', 'bike', 'rider', 'motorcycle', 'motorcyclist', 'pedestrian', 'person', 'walker', 'jogger', 'runner', 'skateboarder', 'scooter', 'delivery driver']}, 'class_mapping': {'dataset_source': 'fisheye8k', 'dataset_target': 'mcity_fisheye_2000', 'change_labels': False, 'hf_models_zeroshot_classification': ['Salesforce/blip2-itm-vit-g', 'openai/clip-vit-large-patch14', 'google/siglip-so400m-patch14-384', 'kakaobrain/align-base', 'BAAI/AltCLIP', 'CIDAS/clipseg-rd64-refined'], 'thresholds': {'confidence': 0.2}, 'candidate_labels': {'Car': ['car', 'van', 'pickup'], 'Truck': ['truck', 'pickup'], 'Bike': ['motorbike/cycler']}}, 'data_ingest': {'dataset_name': 'custom_data', 'annotation_format': 'auto', 'dataset_dir': '/home/dataengine/Downloads/vid', 'split_percentages': [0.7, 0.15, 0.15], 'fps': 2}}
Global settings
PERSISTENT =
True
ACCEPTED_SPLITS =
['train', 'val', 'test']
cpu_count =
4
NUM_WORKERS_MAX =
32
NUM_WORKERS =
4
GLOBAL_SEED =
0
Hugging Face Config
HF_ROOT =
'mcity-data-engine'
HF_DO_UPLOAD =
False
Weights and Biases Config
WANDB_ACTIVE =
True
Voxel51 Config
V51_ADDRESS =
'localhost'
V51_PORT =
5151
V51_REMOTE =
True