config.config

  1import os
  2import psutil
  3
  4#: Select workflow list from 'WORKFLOWS = {...}' dictionary
  5SELECTED_WORKFLOW = ["embedding_selection"]  # Choose from WORKFLOWS keys
  6
  7#: Select dataset from config/datasets.yaml
  8SELECTED_DATASET = {
  9    "name": "fisheye8k",
 10    "n_samples": None,  # 'None' (full dataset) or 'int' (subset of the dataset)
 11    "custom_view": None,  # 'None' (full dataset) or select function from utils/custom_view
 12}
 13
 14#: Workflows and associated parameters
 15WORKFLOWS = {
 16    "aws_download": {
 17        "mcity": {
 18            "bucket": "mcity-data-engine",
 19            "prefix": "",
 20            "download_path": "output/datasets/annarbor_rolling",
 21            "test_run": True,
 22            "selected_dataset_overwrite": True,
 23        }
 24    },
 25    "embedding_selection": {
 26        "mode": "compute",  # "compute" or "load"
 27        "parameters": {
 28            "compute_representativeness": 0.99,
 29            "compute_unique_images_greedy": 0.01,
 30            "compute_unique_images_deterministic": 0.99,
 31            "compute_similar_images": 0.03,
 32            "neighbour_count": 3,
 33        },
 34        "embedding_models": [  # Select from V51 "Embeddings" models https://docs.voxel51.com/model_zoo/models.html
 35            "clip-vit-base32-torch",
 36            # "open-clip-torch",
 37            # "dinov2-vits14-torch",
 38            # "dinov2-vits14-reg-torch",
 39            # "mobilenet-v2-imagenet-torch",
 40            # "resnet152-imagenet-torch",
 41            # "vgg19-imagenet-torch",
 42            # "classification-transformer-torch",
 43            # "detection-transformer-torch",
 44            # "zero-shot-detection-transformer-torch",
 45            # "zero-shot-classification-transformer-torch",
 46        ],
 47    },
 48    "anomaly_detection": {
 49        "mode": ["train", "inference"],  # "train" and "inference" supported
 50        "epochs": 12,
 51        "early_stop_patience": 5,
 52        "anomalib_image_models": {  # Choose from https://anomalib.readthedocs.io/en/v1.2.0/markdown/guides/reference/models/image/index.html
 53            "Padim": {},
 54            # "EfficientAd": {},
 55            # "Draem": {},
 56            # "Cfa": {},
 57        },
 58        "anomalib_eval_metrics": [  # Choose from https://anomalib.readthedocs.io/en/v1.2.0/markdown/guides/reference/metrics/index.html. Focus on standard metrics, computation of others can be expensive
 59            "AUPR",
 60            "AUROC",
 61            "F1Max",
 62        ],
 63        "data_preparation": {"fisheye8k": {"location": "cam1", "rare_class": "Truck"}},
 64    },
 65    "auto_labeling": {
 66        "mode": ["train"],  # "train" and "inference" supported
 67        "model_source": [
 68            # "hf_models_objectdetection",
 69            "ultralytics",
 70            # "custom_codetr",
 71        ],
 72        "n_worker_dataloader": 8,
 73        "epochs": 1000,
 74        "early_stop_patience": 0,
 75        "early_stop_threshold": 0,
 76        "learning_rate": 5e-05,
 77        "weight_decay": 0.0001,
 78        "max_grad_norm": 0.01,
 79        "inference_settings": {
 80            "do_eval": True,
 81            "inference_on_test": True,
 82            "model_hf": None,  # None (automatic selection) or overwrite with Hugging Face ID. Assumes same model as selected below.
 83            "detection_threshold": 0.2,
 84        },
 85        "hf_models_objectdetection": {  # HF Leaderboard: https://huggingface.co/spaces/hf-vision/object_detection_leaderboard
 86            # "microsoft/conditional-detr-resnet-50": {"batch_size": 4},
 87            # "Omnifact/conditional-detr-resnet-101-dc5": {"batch_size": 1},
 88            # "facebook/detr-resnet-50": {"batch_size": 1},
 89            # "facebook/detr-resnet-50-dc5": {"batch_size": 1, "image_size": [960, 960]},
 90            # "facebook/detr-resnet-101": {"batch_size": 4, "image_size": [960, 960]},
 91            # "facebook/detr-resnet-101-dc5": {"batch_size": 1, "image_size": [960, 960]},
 92            # "facebook/deformable-detr-detic": {
 93            #    "batch_size": 4,
 94            #    "image_size": [960, 960],
 95            # },
 96            # "facebook/deformable-detr-box-supervised": {
 97            #   "batch_size": 1,
 98            #   "image_size": [960, 960],
 99            # },
100            # "SenseTime/deformable-detr": {"batch_size": 4, "image_size": [960, 960]},
101            # "SenseTime/deformable-detr-with-box-refine": {
102            #   "batch_size": 1,
103            #   "image_size": [960, 960],
104            # },
105            # "jozhang97/deta-swin-large": {
106            #   "batch_size": 1,
107            #   "image_size": [960, 960],
108            # },
109            # "jozhang97/deta-swin-large-o365": {
110            #    "batch_size": 4,
111            #    "image_size": [960, 960],
112            # },
113            # "hustvl/yolos-base": {"batch_size": 4},
114            "IDEA-Research/dab-detr-resnet-50": {
115                "batch_size": 4,
116                "image_size": [960, 960],
117            },
118            # "PekingU/rtdetr_v2_r18vd": {
119            #    "batch_size": 4,
120            #    "image_size": [960, 960],
121            # },
122        },
123        "custom_codetr": {
124            "export_dataset_root": "output/datasets/codetr_data/",
125            "configs": [
126                "projects/configs/co_deformable_detr/co_deformable_detr_r50_1x_coco.py",
127                "projects/configs/co_dino_vit/co_dino_5scale_vit_large_coco.py",
128            ],
129            "n_gpus": "1",
130            "container_tool": "docker",
131        },
132        "ultralytics": {
133            "export_dataset_root": "output/datasets/ultralytics_data/",
134            "multi_scale": False,
135            "cos_lr": True,
136            "models": {  # Pick from https://docs.ultralytics.com/models/
137                # "yolo11n": {"batch_size": 8, "img_size": 1280},
138                # "yolo11x": {"batch_size": 1, "img_size": 960},
139                "yolo12n": {"batch_size": 8, "img_size": 1280},
140                # "yolo12x": {"batch_size": 1, "img_size": 960},
141            },
142        },
143    },
144    "auto_labeling_zero_shot": {
145        "n_post_processing_worker_per_inference_worker": 5,
146        "n_worker_dataloader": 3,
147        "prefetch_factor_dataloader": 2,
148        "hf_models_zeroshot_objectdetection": {
149            "omlab/omdet-turbo-swin-tiny-hf": {  # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=omlab%2Fomdet
150                "batch_size": 1,
151                "n_dataset_chunks": 1,  # Number of chunks to split the dataset into for parallel processing
152            },
153            "IDEA-Research/grounding-dino-tiny": {  # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=IDEA-Research%2Fgrounding
154                "batch_size": 1,
155                "n_dataset_chunks": 1,
156            },
157            "google/owlvit-large-patch14": {  # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=google%2Fowlvit
158                "batch_size": 1,
159                "n_dataset_chunks": 1,
160            },
161            "google/owlv2-base-patch16-finetuned": {  # https://huggingface.co/models?pipeline_tag=zero-shot-object-detection&sort=trending&search=google%2Fowlv2
162                "batch_size": 1,
163                "n_dataset_chunks": 1,
164            },
165            "google/owlv2-large-patch14-ensemble": {
166                "batch_size": 1,
167                "n_dataset_chunks": 1,
168            },
169        },
170        "detection_threshold": 0.2,
171        "object_classes": [
172            "skater",
173            "child",
174            "bicycle",
175            "bicyclist",
176            "cyclist",
177            "bike",
178            "rider",
179            "motorcycle",
180            "motorcyclist",
181            "pedestrian",
182            "person",
183            "walker",
184            "jogger",
185            "runner",
186            "skateboarder",
187            "scooter",
188            "vehicle",
189            "car",
190            "bus",
191            "truck",
192            "taxi",
193            "van",
194            "pickup truck",
195            "trailer",
196            "emergency vehicle",
197            "delivery driver",
198        ],
199    },
200    "auto_label_mask": {
201        "semantic_segmentation": {
202            "sam2": {
203                "prompt_field": None,
204                "models": [
205                    "segment-anything-2-hiera-tiny-image-torch",
206                    "segment-anything-2-hiera-small-image-torch",
207                    "segment-anything-2-hiera-base-plus-image-torch",
208                    "segment-anything-2.1-hiera-tiny-image-torch",
209                    "segment-anything-2.1-hiera-small-image-torch",
210                    "segment-anything-2.1-hiera-base-plus-image-torch",
211                    "segment-anything-2.1-hiera-large-image-torch",
212                ],
213            },
214        },
215        "depth_estimation": {
216            "dpt": {
217                "models": {
218                    "Intel/dpt-swinv2-tiny-256",
219                    "Intel/dpt-swinv2-large-384",
220                    "Intel/dpt-beit-large-384",
221                    "Intel/dpt-beit-large-512",
222                    "Intel/dpt-large-ade",
223                    "Intel/dpt-large",
224                    "Intel/dpt-hybrid-midas",
225                    "Intel/dpt-swinv2-base-384",
226                    "Intel/dpt-beit-base-384",
227                },
228            },
229            "depth_anything": {
230                "models": {
231                    "LiheYoung/depth-anything-base-hf",
232                    "LiheYoung/depth-anything-large-hf",
233                    "LiheYoung/depth-anything-small-hf",
234                },
235            },
236            "depth_pro": {
237                "models": {
238                    "apple/DepthPro-hf",
239                }
240            },
241            "glpn": {
242                "models": {"vinvino02/glpn-nyu",
243                           "vinvino02/glpn-kitti"},
244            },
245            "zoe_depth": {
246                "models": {
247                    "Intel/zoedepth-nyu-kitti",
248                    "Intel/zoedepth-nyu",
249                    "Intel/zoedepth-kitti",
250                },
251            },
252        },
253    },
254    "ensemble_selection": {
255        "field_includes": "pred_zsod_",  # V51 field used for detections, "pred_zsod_" default for zero-shot object detection models
256        "agreement_threshold": 3,  # Threshold for n models necessary for agreement between models
257        "iou_threshold": 0.5,  # Threshold for IoU between bboxes to consider them as overlapping
258        "max_bbox_size": 0.01,  # Value between [0,1] for the max size of considered bboxes
259        "positive_classes": [  # Classes to consider, must be subset of available classes in the detections. Example for Vulnerable Road Users.
260            "skater",
261            "child",
262            "bicycle",
263            "bicyclist",
264            "cyclist",
265            "bike",
266            "rider",
267            "motorcycle",
268            "motorcyclist",
269            "pedestrian",
270            "person",
271            "walker",
272            "jogger",
273            "runner",
274            "skateboarder",
275            "scooter",
276            "delivery driver",
277        ],
278    },
279    "class_mapping": {
280        # get the source and target dataset names from datasets.yaml
281        "dataset_source": "fisheye8k",
282        "dataset_target": "mcity_fisheye_2000",
283        # Set to True to change detection labels in the dataset, Set to False to just add tags without changing labels in the dataset.
284        "change_labels": False,
285
286         # Choose any number of models from the options below hf_models_zeroshot_classification, to not include a model for class mapping, just comment it out
287         #https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModelForZeroShotImageClassification
288        "hf_models_zeroshot_classification": [
289            "Salesforce/blip2-itm-vit-g",
290            "openai/clip-vit-large-patch14",
291            "google/siglip-so400m-patch14-384",
292            # "google/siglip2-base-patch16-224",
293            "kakaobrain/align-base",
294            "BAAI/AltCLIP",
295            "CIDAS/clipseg-rd64-refined",
296        ],
297        "thresholds": {"confidence": 0.2},
298        "candidate_labels": {
299            # Target class(Generalized class) : Source classes(specific categories)
300            "Car": ["car", "van", "pickup"],
301            "Truck": ["truck", "pickup"],
302            # One_to_one_mapping
303            "Bike": ["motorbike/cycler"],
304            # Can add other class mappings in here
305        },
306    },
307}
308
309"""Global settings"""
310#: Non-persistent datasets are deleted from the database each time the database is shut down
311PERSISTENT = True
312#: Accepted splits for data processing
313ACCEPTED_SPLITS = ["train", "val", "test"]
314cpu_count = len(psutil.Process().cpu_affinity())
315#: Max. number of CPU workers
316NUM_WORKERS_MAX = 32
317NUM_WORKERS = NUM_WORKERS_MAX if cpu_count > NUM_WORKERS_MAX else cpu_count
318#: SEED for reproducability
319GLOBAL_SEED = 0
320
321"""Hugging Face Config"""
322#: Hugging Face name or Organization
323HF_ROOT = "mcity-data-engine"  # https://huggingface.co/mcity-data-engine
324#: Determins if model weights should be uploaded to Hugging Face
325HF_DO_UPLOAD = False
326
327"""Weights and Biases Config"""
328#: Determines if tracking with Weights and Biases is activated
329WANDB_ACTIVE = True
330
331"""Voxel51 Config"""
332#: Address for Voxel51 connection
333V51_ADDRESS = "localhost"
334#: Port for Voxel51 connection
335V51_PORT = 5151
336#: Remote app sessions will listen to any connection to their ports
337V51_REMOTE = True
SELECTED_WORKFLOW = ['embedding_selection']
SELECTED_DATASET = {'name': 'fisheye8k', 'n_samples': None, 'custom_view': None}
WORKFLOWS = {'aws_download': {'mcity': {'bucket': 'mcity-data-engine', 'prefix': '', 'download_path': 'output/datasets/annarbor_rolling', 'test_run': True, 'selected_dataset_overwrite': True}}, 'embedding_selection': {'mode': 'compute', 'parameters': {'compute_representativeness': 0.99, 'compute_unique_images_greedy': 0.01, 'compute_unique_images_deterministic': 0.99, 'compute_similar_images': 0.03, 'neighbour_count': 3}, 'embedding_models': ['clip-vit-base32-torch']}, 'anomaly_detection': {'mode': ['train', 'inference'], 'epochs': 12, 'early_stop_patience': 5, 'anomalib_image_models': {'Padim': {}}, 'anomalib_eval_metrics': ['AUPR', 'AUROC', 'F1Max'], 'data_preparation': {'fisheye8k': {'location': 'cam1', 'rare_class': 'Truck'}}}, 'auto_labeling': {'mode': ['train'], 'model_source': ['ultralytics'], 'n_worker_dataloader': 8, 'epochs': 1000, 'early_stop_patience': 0, 'early_stop_threshold': 0, 'learning_rate': 5e-05, 'weight_decay': 0.0001, 'max_grad_norm': 0.01, 'inference_settings': {'do_eval': True, 'inference_on_test': True, 'model_hf': None, 'detection_threshold': 0.2}, 'hf_models_objectdetection': {'IDEA-Research/dab-detr-resnet-50': {'batch_size': 4, 'image_size': [960, 960]}}, 'custom_codetr': {'export_dataset_root': 'output/datasets/codetr_data/', 'configs': ['projects/configs/co_deformable_detr/co_deformable_detr_r50_1x_coco.py', 'projects/configs/co_dino_vit/co_dino_5scale_vit_large_coco.py'], 'n_gpus': '1', 'container_tool': 'docker'}, 'ultralytics': {'export_dataset_root': 'output/datasets/ultralytics_data/', 'multi_scale': False, 'cos_lr': True, 'models': {'yolo12n': {'batch_size': 8, 'img_size': 1280}}}}, 'auto_labeling_zero_shot': {'n_post_processing_worker_per_inference_worker': 5, 'n_worker_dataloader': 3, 'prefetch_factor_dataloader': 2, 'hf_models_zeroshot_objectdetection': {'omlab/omdet-turbo-swin-tiny-hf': {'batch_size': 1, 'n_dataset_chunks': 1}, 'IDEA-Research/grounding-dino-tiny': {'batch_size': 1, 'n_dataset_chunks': 1}, 'google/owlvit-large-patch14': {'batch_size': 1, 'n_dataset_chunks': 1}, 'google/owlv2-base-patch16-finetuned': {'batch_size': 1, 'n_dataset_chunks': 1}, 'google/owlv2-large-patch14-ensemble': {'batch_size': 1, 'n_dataset_chunks': 1}}, 'detection_threshold': 0.2, 'object_classes': ['skater', 'child', 'bicycle', 'bicyclist', 'cyclist', 'bike', 'rider', 'motorcycle', 'motorcyclist', 'pedestrian', 'person', 'walker', 'jogger', 'runner', 'skateboarder', 'scooter', 'vehicle', 'car', 'bus', 'truck', 'taxi', 'van', 'pickup truck', 'trailer', 'emergency vehicle', 'delivery driver']}, 'auto_label_mask': {'semantic_segmentation': {'sam2': {'prompt_field': None, 'models': ['segment-anything-2-hiera-tiny-image-torch', 'segment-anything-2-hiera-small-image-torch', 'segment-anything-2-hiera-base-plus-image-torch', 'segment-anything-2.1-hiera-tiny-image-torch', 'segment-anything-2.1-hiera-small-image-torch', 'segment-anything-2.1-hiera-base-plus-image-torch', 'segment-anything-2.1-hiera-large-image-torch']}}, 'depth_estimation': {'dpt': {'models': {'Intel/dpt-swinv2-base-384', 'Intel/dpt-hybrid-midas', 'Intel/dpt-swinv2-large-384', 'Intel/dpt-beit-large-512', 'Intel/dpt-swinv2-tiny-256', 'Intel/dpt-beit-large-384', 'Intel/dpt-beit-base-384', 'Intel/dpt-large-ade', 'Intel/dpt-large'}}, 'depth_anything': {'models': {'LiheYoung/depth-anything-base-hf', 'LiheYoung/depth-anything-small-hf', 'LiheYoung/depth-anything-large-hf'}}, 'depth_pro': {'models': {'apple/DepthPro-hf'}}, 'glpn': {'models': {'vinvino02/glpn-kitti', 'vinvino02/glpn-nyu'}}, 'zoe_depth': {'models': {'Intel/zoedepth-nyu', 'Intel/zoedepth-nyu-kitti', 'Intel/zoedepth-kitti'}}}}, 'ensemble_selection': {'field_includes': 'pred_zsod_', 'agreement_threshold': 3, 'iou_threshold': 0.5, 'max_bbox_size': 0.01, 'positive_classes': ['skater', 'child', 'bicycle', 'bicyclist', 'cyclist', 'bike', 'rider', 'motorcycle', 'motorcyclist', 'pedestrian', 'person', 'walker', 'jogger', 'runner', 'skateboarder', 'scooter', 'delivery driver']}, 'class_mapping': {'dataset_source': 'fisheye8k', 'dataset_target': 'mcity_fisheye_2000', 'change_labels': False, 'hf_models_zeroshot_classification': ['Salesforce/blip2-itm-vit-g', 'openai/clip-vit-large-patch14', 'google/siglip-so400m-patch14-384', 'kakaobrain/align-base', 'BAAI/AltCLIP', 'CIDAS/clipseg-rd64-refined'], 'thresholds': {'confidence': 0.2}, 'candidate_labels': {'Car': ['car', 'van', 'pickup'], 'Truck': ['truck', 'pickup'], 'Bike': ['motorbike/cycler']}}}

Global settings

PERSISTENT = True
ACCEPTED_SPLITS = ['train', 'val', 'test']
cpu_count = 4
NUM_WORKERS_MAX = 32
NUM_WORKERS = 4
GLOBAL_SEED = 0

Hugging Face Config

HF_ROOT = 'mcity-data-engine'
HF_DO_UPLOAD = False

Weights and Biases Config

WANDB_ACTIVE = True

Voxel51 Config

V51_ADDRESS = 'localhost'
V51_PORT = 5151
V51_REMOTE = True