utils.anomaly_detection_data_preparation
1import logging 2import os 3 4import fiftyone as fo 5from fiftyone import ViewField as F 6from PIL import Image, ImageDraw 7 8from config.config import WORKFLOWS 9from utils.selector import select_by_class 10 11 12class AnomalyDetectionDataPreparation: 13 """Class to prepare datasets for anomaly detection by separating normal from rare class data and creating binary masks for anomalies.""" 14 15 def __init__( 16 self, dataset, dataset_name, export_root="output/datasets/", config=None 17 ): 18 """Initialize AnomalyDetectionDataPreparation object with dataset and configuration for data processing.""" 19 self.dataset = dataset 20 self.dataset_ano_dec = None 21 self.dataset_name = dataset_name 22 self.export_root = export_root 23 if config is not None: 24 # Allow custom config for testing 25 self.config = config 26 else: 27 self.config = WORKFLOWS["anomaly_detection"]["data_preparation"].get( 28 self.dataset_name, None 29 ) 30 if self.config is None: 31 logging.error( 32 f"Data preparation config for dataset {self.dataset_name} missing" 33 ) 34 35 SUPPORTED_DATASETS = {"fisheye8k"} 36 37 supported_dataset_found = False 38 for dataset in SUPPORTED_DATASETS: 39 if ( 40 dataset in self.dataset_name 41 ): # Allow for generalization for test datasets 42 # Call method that is named like dataset 43 supported_dataset_found = True 44 method = getattr(self, dataset) 45 method() 46 47 if supported_dataset_found == False: 48 logging.error( 49 f"Dataset {self.dataset_name} is currently not supported for Anomaly Detection. Please prepare a workflow to prepare to define normality and a rare class." 50 ) 51 return None 52 53 def fisheye8k(self): 54 """Prepares Fisheye8K dataset for anomaly detection by filtering data from one camera, separating rare classes, and generating binary masks.""" 55 logging.info( 56 f"Running anomaly detection data preparation for dataset {self.dataset_name}" 57 ) 58 dataset_name_ano_dec = f"{self.dataset_name}_anomaly_detection" 59 60 if dataset_name_ano_dec in fo.list_datasets(): 61 logging.warning( 62 f"Dataset {self.dataset_name} was already prepared for anomaly detection. Skipping data export." 63 ) 64 self.dataset_ano_dec = fo.load_dataset(dataset_name_ano_dec) 65 else: 66 location_filter = self.config.get("location", "cam1") 67 rare_classes = self.config.get("rare_classes", ["Truck"]) 68 gt_field = self.config.get("gt_field", "ground_truth") 69 # Filter to only include data from one camera to make the data distribution clearer 70 view_location = self.dataset.match(F("location") == location_filter) 71 logging.info( 72 f"Data pre-processing for the Fisheye8K dataset. Data from location {location_filter} is used, with {rare_classes} as the rare classes." 73 ) 74 75 # Build training and validation datasets 76 view_train = select_by_class(view_location, classes_out=rare_classes) 77 view_val = select_by_class(view_location, classes_in=rare_classes) 78 79 # Data export 80 export_dir = os.path.join(self.export_root, dataset_name_ano_dec) 81 82 classes = self.dataset.distinct("ground_truth.detections.label") 83 dataset_splits = ["train", "val"] 84 dataset_type = fo.types.YOLOv5Dataset 85 86 view_train.export( 87 export_dir=export_dir, 88 dataset_type=dataset_type, 89 label_field=gt_field, 90 split=dataset_splits[0], 91 classes=classes, 92 ) 93 94 view_val.export( 95 export_dir=export_dir, 96 dataset_type=dataset_type, 97 label_field=gt_field, 98 split=dataset_splits[1], 99 classes=classes, 100 ) 101 102 # Load the exported dataset 103 if dataset_name_ano_dec in fo.list_datasets(): 104 dataset_ano_dec = fo.load_dataset(dataset_name_ano_dec) 105 logging.info(f"Existing dataset {dataset_name_ano_dec} was loaded.") 106 else: 107 dataset_ano_dec = fo.Dataset(dataset_name_ano_dec) 108 for split in dataset_splits: 109 dataset_ano_dec.add_dir( 110 dataset_dir=export_dir, 111 dataset_type=dataset_type, 112 split=split, 113 tags=split, 114 ) 115 dataset_ano_dec.compute_metadata() 116 117 self.dataset_ano_dec = dataset_ano_dec 118 119 # Select samples that include a rare class 120 anomalous_view = dataset_ano_dec.match_tags("val", "test") 121 logging.info(f"Processing {len(anomalous_view)} val samples") 122 123 # Prepare data for Anomalib 124 dataset_name_ano_dec_masks = f"{dataset_name_ano_dec}_masks" 125 export_dir_masks = os.path.join( 126 self.export_root, dataset_name_ano_dec_masks 127 ) 128 os.makedirs(export_dir_masks, exist_ok=True) 129 130 for sample in anomalous_view.iter_samples(progress=True): 131 img_width = sample.metadata.width 132 img_height = sample.metadata.height 133 mask = Image.new( 134 "L", (img_width, img_height), 0 135 ) # Create a black image 136 draw = ImageDraw.Draw(mask) 137 for bbox in sample.ground_truth.detections: 138 if bbox.label in rare_classes: 139 # Convert V51 format to image format 140 141 x_min_rel, y_min_rel, width_rel, height_rel = bbox.bounding_box 142 x_min = int(x_min_rel * img_width) 143 y_min = int(y_min_rel * img_height) 144 x_max = int((x_min_rel + width_rel) * img_width) 145 y_max = int((y_min_rel + height_rel) * img_height) 146 147 # draw.rectangle([x0, y0, x1, y1], fill=255) # [x0, y0, x1, y1] 148 draw.rectangle( 149 [x_min, y_min, x_max, y_max], fill=255 150 ) # [x0, y0, x1, y1] 151 152 # Save the mask 153 filename = os.path.basename(sample.filepath).replace(".jpg", ".png") 154 mask.save(os.path.join(export_dir_masks, f"{filename}"))
class
AnomalyDetectionDataPreparation:
13class AnomalyDetectionDataPreparation: 14 """Class to prepare datasets for anomaly detection by separating normal from rare class data and creating binary masks for anomalies.""" 15 16 def __init__( 17 self, dataset, dataset_name, export_root="output/datasets/", config=None 18 ): 19 """Initialize AnomalyDetectionDataPreparation object with dataset and configuration for data processing.""" 20 self.dataset = dataset 21 self.dataset_ano_dec = None 22 self.dataset_name = dataset_name 23 self.export_root = export_root 24 if config is not None: 25 # Allow custom config for testing 26 self.config = config 27 else: 28 self.config = WORKFLOWS["anomaly_detection"]["data_preparation"].get( 29 self.dataset_name, None 30 ) 31 if self.config is None: 32 logging.error( 33 f"Data preparation config for dataset {self.dataset_name} missing" 34 ) 35 36 SUPPORTED_DATASETS = {"fisheye8k"} 37 38 supported_dataset_found = False 39 for dataset in SUPPORTED_DATASETS: 40 if ( 41 dataset in self.dataset_name 42 ): # Allow for generalization for test datasets 43 # Call method that is named like dataset 44 supported_dataset_found = True 45 method = getattr(self, dataset) 46 method() 47 48 if supported_dataset_found == False: 49 logging.error( 50 f"Dataset {self.dataset_name} is currently not supported for Anomaly Detection. Please prepare a workflow to prepare to define normality and a rare class." 51 ) 52 return None 53 54 def fisheye8k(self): 55 """Prepares Fisheye8K dataset for anomaly detection by filtering data from one camera, separating rare classes, and generating binary masks.""" 56 logging.info( 57 f"Running anomaly detection data preparation for dataset {self.dataset_name}" 58 ) 59 dataset_name_ano_dec = f"{self.dataset_name}_anomaly_detection" 60 61 if dataset_name_ano_dec in fo.list_datasets(): 62 logging.warning( 63 f"Dataset {self.dataset_name} was already prepared for anomaly detection. Skipping data export." 64 ) 65 self.dataset_ano_dec = fo.load_dataset(dataset_name_ano_dec) 66 else: 67 location_filter = self.config.get("location", "cam1") 68 rare_classes = self.config.get("rare_classes", ["Truck"]) 69 gt_field = self.config.get("gt_field", "ground_truth") 70 # Filter to only include data from one camera to make the data distribution clearer 71 view_location = self.dataset.match(F("location") == location_filter) 72 logging.info( 73 f"Data pre-processing for the Fisheye8K dataset. Data from location {location_filter} is used, with {rare_classes} as the rare classes." 74 ) 75 76 # Build training and validation datasets 77 view_train = select_by_class(view_location, classes_out=rare_classes) 78 view_val = select_by_class(view_location, classes_in=rare_classes) 79 80 # Data export 81 export_dir = os.path.join(self.export_root, dataset_name_ano_dec) 82 83 classes = self.dataset.distinct("ground_truth.detections.label") 84 dataset_splits = ["train", "val"] 85 dataset_type = fo.types.YOLOv5Dataset 86 87 view_train.export( 88 export_dir=export_dir, 89 dataset_type=dataset_type, 90 label_field=gt_field, 91 split=dataset_splits[0], 92 classes=classes, 93 ) 94 95 view_val.export( 96 export_dir=export_dir, 97 dataset_type=dataset_type, 98 label_field=gt_field, 99 split=dataset_splits[1], 100 classes=classes, 101 ) 102 103 # Load the exported dataset 104 if dataset_name_ano_dec in fo.list_datasets(): 105 dataset_ano_dec = fo.load_dataset(dataset_name_ano_dec) 106 logging.info(f"Existing dataset {dataset_name_ano_dec} was loaded.") 107 else: 108 dataset_ano_dec = fo.Dataset(dataset_name_ano_dec) 109 for split in dataset_splits: 110 dataset_ano_dec.add_dir( 111 dataset_dir=export_dir, 112 dataset_type=dataset_type, 113 split=split, 114 tags=split, 115 ) 116 dataset_ano_dec.compute_metadata() 117 118 self.dataset_ano_dec = dataset_ano_dec 119 120 # Select samples that include a rare class 121 anomalous_view = dataset_ano_dec.match_tags("val", "test") 122 logging.info(f"Processing {len(anomalous_view)} val samples") 123 124 # Prepare data for Anomalib 125 dataset_name_ano_dec_masks = f"{dataset_name_ano_dec}_masks" 126 export_dir_masks = os.path.join( 127 self.export_root, dataset_name_ano_dec_masks 128 ) 129 os.makedirs(export_dir_masks, exist_ok=True) 130 131 for sample in anomalous_view.iter_samples(progress=True): 132 img_width = sample.metadata.width 133 img_height = sample.metadata.height 134 mask = Image.new( 135 "L", (img_width, img_height), 0 136 ) # Create a black image 137 draw = ImageDraw.Draw(mask) 138 for bbox in sample.ground_truth.detections: 139 if bbox.label in rare_classes: 140 # Convert V51 format to image format 141 142 x_min_rel, y_min_rel, width_rel, height_rel = bbox.bounding_box 143 x_min = int(x_min_rel * img_width) 144 y_min = int(y_min_rel * img_height) 145 x_max = int((x_min_rel + width_rel) * img_width) 146 y_max = int((y_min_rel + height_rel) * img_height) 147 148 # draw.rectangle([x0, y0, x1, y1], fill=255) # [x0, y0, x1, y1] 149 draw.rectangle( 150 [x_min, y_min, x_max, y_max], fill=255 151 ) # [x0, y0, x1, y1] 152 153 # Save the mask 154 filename = os.path.basename(sample.filepath).replace(".jpg", ".png") 155 mask.save(os.path.join(export_dir_masks, f"{filename}"))
Class to prepare datasets for anomaly detection by separating normal from rare class data and creating binary masks for anomalies.
AnomalyDetectionDataPreparation(dataset, dataset_name, export_root='output/datasets/', config=None)
16 def __init__( 17 self, dataset, dataset_name, export_root="output/datasets/", config=None 18 ): 19 """Initialize AnomalyDetectionDataPreparation object with dataset and configuration for data processing.""" 20 self.dataset = dataset 21 self.dataset_ano_dec = None 22 self.dataset_name = dataset_name 23 self.export_root = export_root 24 if config is not None: 25 # Allow custom config for testing 26 self.config = config 27 else: 28 self.config = WORKFLOWS["anomaly_detection"]["data_preparation"].get( 29 self.dataset_name, None 30 ) 31 if self.config is None: 32 logging.error( 33 f"Data preparation config for dataset {self.dataset_name} missing" 34 ) 35 36 SUPPORTED_DATASETS = {"fisheye8k"} 37 38 supported_dataset_found = False 39 for dataset in SUPPORTED_DATASETS: 40 if ( 41 dataset in self.dataset_name 42 ): # Allow for generalization for test datasets 43 # Call method that is named like dataset 44 supported_dataset_found = True 45 method = getattr(self, dataset) 46 method() 47 48 if supported_dataset_found == False: 49 logging.error( 50 f"Dataset {self.dataset_name} is currently not supported for Anomaly Detection. Please prepare a workflow to prepare to define normality and a rare class." 51 ) 52 return None
Initialize AnomalyDetectionDataPreparation object with dataset and configuration for data processing.
def
fisheye8k(self):
54 def fisheye8k(self): 55 """Prepares Fisheye8K dataset for anomaly detection by filtering data from one camera, separating rare classes, and generating binary masks.""" 56 logging.info( 57 f"Running anomaly detection data preparation for dataset {self.dataset_name}" 58 ) 59 dataset_name_ano_dec = f"{self.dataset_name}_anomaly_detection" 60 61 if dataset_name_ano_dec in fo.list_datasets(): 62 logging.warning( 63 f"Dataset {self.dataset_name} was already prepared for anomaly detection. Skipping data export." 64 ) 65 self.dataset_ano_dec = fo.load_dataset(dataset_name_ano_dec) 66 else: 67 location_filter = self.config.get("location", "cam1") 68 rare_classes = self.config.get("rare_classes", ["Truck"]) 69 gt_field = self.config.get("gt_field", "ground_truth") 70 # Filter to only include data from one camera to make the data distribution clearer 71 view_location = self.dataset.match(F("location") == location_filter) 72 logging.info( 73 f"Data pre-processing for the Fisheye8K dataset. Data from location {location_filter} is used, with {rare_classes} as the rare classes." 74 ) 75 76 # Build training and validation datasets 77 view_train = select_by_class(view_location, classes_out=rare_classes) 78 view_val = select_by_class(view_location, classes_in=rare_classes) 79 80 # Data export 81 export_dir = os.path.join(self.export_root, dataset_name_ano_dec) 82 83 classes = self.dataset.distinct("ground_truth.detections.label") 84 dataset_splits = ["train", "val"] 85 dataset_type = fo.types.YOLOv5Dataset 86 87 view_train.export( 88 export_dir=export_dir, 89 dataset_type=dataset_type, 90 label_field=gt_field, 91 split=dataset_splits[0], 92 classes=classes, 93 ) 94 95 view_val.export( 96 export_dir=export_dir, 97 dataset_type=dataset_type, 98 label_field=gt_field, 99 split=dataset_splits[1], 100 classes=classes, 101 ) 102 103 # Load the exported dataset 104 if dataset_name_ano_dec in fo.list_datasets(): 105 dataset_ano_dec = fo.load_dataset(dataset_name_ano_dec) 106 logging.info(f"Existing dataset {dataset_name_ano_dec} was loaded.") 107 else: 108 dataset_ano_dec = fo.Dataset(dataset_name_ano_dec) 109 for split in dataset_splits: 110 dataset_ano_dec.add_dir( 111 dataset_dir=export_dir, 112 dataset_type=dataset_type, 113 split=split, 114 tags=split, 115 ) 116 dataset_ano_dec.compute_metadata() 117 118 self.dataset_ano_dec = dataset_ano_dec 119 120 # Select samples that include a rare class 121 anomalous_view = dataset_ano_dec.match_tags("val", "test") 122 logging.info(f"Processing {len(anomalous_view)} val samples") 123 124 # Prepare data for Anomalib 125 dataset_name_ano_dec_masks = f"{dataset_name_ano_dec}_masks" 126 export_dir_masks = os.path.join( 127 self.export_root, dataset_name_ano_dec_masks 128 ) 129 os.makedirs(export_dir_masks, exist_ok=True) 130 131 for sample in anomalous_view.iter_samples(progress=True): 132 img_width = sample.metadata.width 133 img_height = sample.metadata.height 134 mask = Image.new( 135 "L", (img_width, img_height), 0 136 ) # Create a black image 137 draw = ImageDraw.Draw(mask) 138 for bbox in sample.ground_truth.detections: 139 if bbox.label in rare_classes: 140 # Convert V51 format to image format 141 142 x_min_rel, y_min_rel, width_rel, height_rel = bbox.bounding_box 143 x_min = int(x_min_rel * img_width) 144 y_min = int(y_min_rel * img_height) 145 x_max = int((x_min_rel + width_rel) * img_width) 146 y_max = int((y_min_rel + height_rel) * img_height) 147 148 # draw.rectangle([x0, y0, x1, y1], fill=255) # [x0, y0, x1, y1] 149 draw.rectangle( 150 [x_min, y_min, x_max, y_max], fill=255 151 ) # [x0, y0, x1, y1] 152 153 # Save the mask 154 filename = os.path.basename(sample.filepath).replace(".jpg", ".png") 155 mask.save(os.path.join(export_dir_masks, f"{filename}"))
Prepares Fisheye8K dataset for anomaly detection by filtering data from one camera, separating rare classes, and generating binary masks.