utils.anomaly_detection_data_preparation

  1import logging
  2import os
  3
  4import fiftyone as fo
  5from fiftyone import ViewField as F
  6from PIL import Image, ImageDraw
  7
  8from config.config import WORKFLOWS
  9from utils.selector import select_by_class
 10
 11
 12class AnomalyDetectionDataPreparation:
 13    """Class to prepare datasets for anomaly detection by separating normal from rare class data and creating binary masks for anomalies."""
 14
 15    def __init__(
 16        self, dataset, dataset_name, export_root="output/datasets/", config=None
 17    ):
 18        """Initialize AnomalyDetectionDataPreparation object with dataset and configuration for data processing."""
 19        self.dataset = dataset
 20        self.dataset_ano_dec = None
 21        self.dataset_name = dataset_name
 22        self.export_root = export_root
 23        if config is not None:
 24            # Allow custom config for testing
 25            self.config = config
 26        else:
 27            self.config = WORKFLOWS["anomaly_detection"]["data_preparation"].get(
 28                self.dataset_name, None
 29            )
 30        if self.config is None:
 31            logging.error(
 32                f"Data preparation config for dataset {self.dataset_name} missing"
 33            )
 34
 35        SUPPORTED_DATASETS = {"fisheye8k"}
 36
 37        supported_dataset_found = False
 38        for dataset in SUPPORTED_DATASETS:
 39            if (
 40                dataset in self.dataset_name
 41            ):  # Allow for generalization for test datasets
 42                # Call method that is named like dataset
 43                supported_dataset_found = True
 44                method = getattr(self, dataset)
 45                method()
 46
 47        if supported_dataset_found == False:
 48            logging.error(
 49                f"Dataset {self.dataset_name} is currently not supported for Anomaly Detection. Please prepare a workflow to prepare to define normality and a rare class."
 50            )
 51            return None
 52
 53    def fisheye8k(self):
 54        """Prepares Fisheye8K dataset for anomaly detection by filtering data from one camera, separating rare classes, and generating binary masks."""
 55        logging.info(
 56            f"Running anomaly detection data preparation for dataset {self.dataset_name}"
 57        )
 58        dataset_name_ano_dec = f"{self.dataset_name}_anomaly_detection"
 59
 60        if dataset_name_ano_dec in fo.list_datasets():
 61            logging.warning(
 62                f"Dataset {self.dataset_name} was already prepared for anomaly detection. Skipping data export."
 63            )
 64            self.dataset_ano_dec = fo.load_dataset(dataset_name_ano_dec)
 65        else:
 66            location_filter = self.config.get("location", "cam1")
 67            rare_classes = self.config.get("rare_classes", ["Truck"])
 68            gt_field = self.config.get("gt_field", "ground_truth")
 69            # Filter to only include data from one camera to make the data distribution clearer
 70            view_location = self.dataset.match(F("location") == location_filter)
 71            logging.info(
 72                f"Data pre-processing for the Fisheye8K dataset. Data from location {location_filter} is used, with {rare_classes} as the rare classes."
 73            )
 74
 75            # Build training and validation datasets
 76            view_train = select_by_class(view_location, classes_out=rare_classes)
 77            view_val = select_by_class(view_location, classes_in=rare_classes)
 78
 79            # Data export
 80            export_dir = os.path.join(self.export_root, dataset_name_ano_dec)
 81
 82            classes = self.dataset.distinct("ground_truth.detections.label")
 83            dataset_splits = ["train", "val"]
 84            dataset_type = fo.types.YOLOv5Dataset
 85
 86            view_train.export(
 87                export_dir=export_dir,
 88                dataset_type=dataset_type,
 89                label_field=gt_field,
 90                split=dataset_splits[0],
 91                classes=classes,
 92            )
 93
 94            view_val.export(
 95                export_dir=export_dir,
 96                dataset_type=dataset_type,
 97                label_field=gt_field,
 98                split=dataset_splits[1],
 99                classes=classes,
100            )
101
102            # Load the exported dataset
103            if dataset_name_ano_dec in fo.list_datasets():
104                dataset_ano_dec = fo.load_dataset(dataset_name_ano_dec)
105                logging.info(f"Existing dataset {dataset_name_ano_dec} was loaded.")
106            else:
107                dataset_ano_dec = fo.Dataset(dataset_name_ano_dec)
108                for split in dataset_splits:
109                    dataset_ano_dec.add_dir(
110                        dataset_dir=export_dir,
111                        dataset_type=dataset_type,
112                        split=split,
113                        tags=split,
114                    )
115                dataset_ano_dec.compute_metadata()
116
117            self.dataset_ano_dec = dataset_ano_dec
118
119            # Select samples that include a rare class
120            anomalous_view = dataset_ano_dec.match_tags("val", "test")
121            logging.info(f"Processing {len(anomalous_view)} val samples")
122
123            # Prepare data for Anomalib
124            dataset_name_ano_dec_masks = f"{dataset_name_ano_dec}_masks"
125            export_dir_masks = os.path.join(
126                self.export_root, dataset_name_ano_dec_masks
127            )
128            os.makedirs(export_dir_masks, exist_ok=True)
129
130            for sample in anomalous_view.iter_samples(progress=True):
131                img_width = sample.metadata.width
132                img_height = sample.metadata.height
133                mask = Image.new(
134                    "L", (img_width, img_height), 0
135                )  # Create a black image
136                draw = ImageDraw.Draw(mask)
137                for bbox in sample.ground_truth.detections:
138                    if bbox.label in rare_classes:
139                        # Convert V51 format to image format
140
141                        x_min_rel, y_min_rel, width_rel, height_rel = bbox.bounding_box
142                        x_min = int(x_min_rel * img_width)
143                        y_min = int(y_min_rel * img_height)
144                        x_max = int((x_min_rel + width_rel) * img_width)
145                        y_max = int((y_min_rel + height_rel) * img_height)
146
147                        # draw.rectangle([x0, y0, x1, y1], fill=255)  # [x0, y0, x1, y1]
148                        draw.rectangle(
149                            [x_min, y_min, x_max, y_max], fill=255
150                        )  # [x0, y0, x1, y1]
151
152                # Save the mask
153                filename = os.path.basename(sample.filepath).replace(".jpg", ".png")
154                mask.save(os.path.join(export_dir_masks, f"{filename}"))
class AnomalyDetectionDataPreparation:
 13class AnomalyDetectionDataPreparation:
 14    """Class to prepare datasets for anomaly detection by separating normal from rare class data and creating binary masks for anomalies."""
 15
 16    def __init__(
 17        self, dataset, dataset_name, export_root="output/datasets/", config=None
 18    ):
 19        """Initialize AnomalyDetectionDataPreparation object with dataset and configuration for data processing."""
 20        self.dataset = dataset
 21        self.dataset_ano_dec = None
 22        self.dataset_name = dataset_name
 23        self.export_root = export_root
 24        if config is not None:
 25            # Allow custom config for testing
 26            self.config = config
 27        else:
 28            self.config = WORKFLOWS["anomaly_detection"]["data_preparation"].get(
 29                self.dataset_name, None
 30            )
 31        if self.config is None:
 32            logging.error(
 33                f"Data preparation config for dataset {self.dataset_name} missing"
 34            )
 35
 36        SUPPORTED_DATASETS = {"fisheye8k"}
 37
 38        supported_dataset_found = False
 39        for dataset in SUPPORTED_DATASETS:
 40            if (
 41                dataset in self.dataset_name
 42            ):  # Allow for generalization for test datasets
 43                # Call method that is named like dataset
 44                supported_dataset_found = True
 45                method = getattr(self, dataset)
 46                method()
 47
 48        if supported_dataset_found == False:
 49            logging.error(
 50                f"Dataset {self.dataset_name} is currently not supported for Anomaly Detection. Please prepare a workflow to prepare to define normality and a rare class."
 51            )
 52            return None
 53
 54    def fisheye8k(self):
 55        """Prepares Fisheye8K dataset for anomaly detection by filtering data from one camera, separating rare classes, and generating binary masks."""
 56        logging.info(
 57            f"Running anomaly detection data preparation for dataset {self.dataset_name}"
 58        )
 59        dataset_name_ano_dec = f"{self.dataset_name}_anomaly_detection"
 60
 61        if dataset_name_ano_dec in fo.list_datasets():
 62            logging.warning(
 63                f"Dataset {self.dataset_name} was already prepared for anomaly detection. Skipping data export."
 64            )
 65            self.dataset_ano_dec = fo.load_dataset(dataset_name_ano_dec)
 66        else:
 67            location_filter = self.config.get("location", "cam1")
 68            rare_classes = self.config.get("rare_classes", ["Truck"])
 69            gt_field = self.config.get("gt_field", "ground_truth")
 70            # Filter to only include data from one camera to make the data distribution clearer
 71            view_location = self.dataset.match(F("location") == location_filter)
 72            logging.info(
 73                f"Data pre-processing for the Fisheye8K dataset. Data from location {location_filter} is used, with {rare_classes} as the rare classes."
 74            )
 75
 76            # Build training and validation datasets
 77            view_train = select_by_class(view_location, classes_out=rare_classes)
 78            view_val = select_by_class(view_location, classes_in=rare_classes)
 79
 80            # Data export
 81            export_dir = os.path.join(self.export_root, dataset_name_ano_dec)
 82
 83            classes = self.dataset.distinct("ground_truth.detections.label")
 84            dataset_splits = ["train", "val"]
 85            dataset_type = fo.types.YOLOv5Dataset
 86
 87            view_train.export(
 88                export_dir=export_dir,
 89                dataset_type=dataset_type,
 90                label_field=gt_field,
 91                split=dataset_splits[0],
 92                classes=classes,
 93            )
 94
 95            view_val.export(
 96                export_dir=export_dir,
 97                dataset_type=dataset_type,
 98                label_field=gt_field,
 99                split=dataset_splits[1],
100                classes=classes,
101            )
102
103            # Load the exported dataset
104            if dataset_name_ano_dec in fo.list_datasets():
105                dataset_ano_dec = fo.load_dataset(dataset_name_ano_dec)
106                logging.info(f"Existing dataset {dataset_name_ano_dec} was loaded.")
107            else:
108                dataset_ano_dec = fo.Dataset(dataset_name_ano_dec)
109                for split in dataset_splits:
110                    dataset_ano_dec.add_dir(
111                        dataset_dir=export_dir,
112                        dataset_type=dataset_type,
113                        split=split,
114                        tags=split,
115                    )
116                dataset_ano_dec.compute_metadata()
117
118            self.dataset_ano_dec = dataset_ano_dec
119
120            # Select samples that include a rare class
121            anomalous_view = dataset_ano_dec.match_tags("val", "test")
122            logging.info(f"Processing {len(anomalous_view)} val samples")
123
124            # Prepare data for Anomalib
125            dataset_name_ano_dec_masks = f"{dataset_name_ano_dec}_masks"
126            export_dir_masks = os.path.join(
127                self.export_root, dataset_name_ano_dec_masks
128            )
129            os.makedirs(export_dir_masks, exist_ok=True)
130
131            for sample in anomalous_view.iter_samples(progress=True):
132                img_width = sample.metadata.width
133                img_height = sample.metadata.height
134                mask = Image.new(
135                    "L", (img_width, img_height), 0
136                )  # Create a black image
137                draw = ImageDraw.Draw(mask)
138                for bbox in sample.ground_truth.detections:
139                    if bbox.label in rare_classes:
140                        # Convert V51 format to image format
141
142                        x_min_rel, y_min_rel, width_rel, height_rel = bbox.bounding_box
143                        x_min = int(x_min_rel * img_width)
144                        y_min = int(y_min_rel * img_height)
145                        x_max = int((x_min_rel + width_rel) * img_width)
146                        y_max = int((y_min_rel + height_rel) * img_height)
147
148                        # draw.rectangle([x0, y0, x1, y1], fill=255)  # [x0, y0, x1, y1]
149                        draw.rectangle(
150                            [x_min, y_min, x_max, y_max], fill=255
151                        )  # [x0, y0, x1, y1]
152
153                # Save the mask
154                filename = os.path.basename(sample.filepath).replace(".jpg", ".png")
155                mask.save(os.path.join(export_dir_masks, f"{filename}"))

Class to prepare datasets for anomaly detection by separating normal from rare class data and creating binary masks for anomalies.

AnomalyDetectionDataPreparation(dataset, dataset_name, export_root='output/datasets/', config=None)
16    def __init__(
17        self, dataset, dataset_name, export_root="output/datasets/", config=None
18    ):
19        """Initialize AnomalyDetectionDataPreparation object with dataset and configuration for data processing."""
20        self.dataset = dataset
21        self.dataset_ano_dec = None
22        self.dataset_name = dataset_name
23        self.export_root = export_root
24        if config is not None:
25            # Allow custom config for testing
26            self.config = config
27        else:
28            self.config = WORKFLOWS["anomaly_detection"]["data_preparation"].get(
29                self.dataset_name, None
30            )
31        if self.config is None:
32            logging.error(
33                f"Data preparation config for dataset {self.dataset_name} missing"
34            )
35
36        SUPPORTED_DATASETS = {"fisheye8k"}
37
38        supported_dataset_found = False
39        for dataset in SUPPORTED_DATASETS:
40            if (
41                dataset in self.dataset_name
42            ):  # Allow for generalization for test datasets
43                # Call method that is named like dataset
44                supported_dataset_found = True
45                method = getattr(self, dataset)
46                method()
47
48        if supported_dataset_found == False:
49            logging.error(
50                f"Dataset {self.dataset_name} is currently not supported for Anomaly Detection. Please prepare a workflow to prepare to define normality and a rare class."
51            )
52            return None

Initialize AnomalyDetectionDataPreparation object with dataset and configuration for data processing.

dataset
dataset_ano_dec
dataset_name
export_root
def fisheye8k(self):
 54    def fisheye8k(self):
 55        """Prepares Fisheye8K dataset for anomaly detection by filtering data from one camera, separating rare classes, and generating binary masks."""
 56        logging.info(
 57            f"Running anomaly detection data preparation for dataset {self.dataset_name}"
 58        )
 59        dataset_name_ano_dec = f"{self.dataset_name}_anomaly_detection"
 60
 61        if dataset_name_ano_dec in fo.list_datasets():
 62            logging.warning(
 63                f"Dataset {self.dataset_name} was already prepared for anomaly detection. Skipping data export."
 64            )
 65            self.dataset_ano_dec = fo.load_dataset(dataset_name_ano_dec)
 66        else:
 67            location_filter = self.config.get("location", "cam1")
 68            rare_classes = self.config.get("rare_classes", ["Truck"])
 69            gt_field = self.config.get("gt_field", "ground_truth")
 70            # Filter to only include data from one camera to make the data distribution clearer
 71            view_location = self.dataset.match(F("location") == location_filter)
 72            logging.info(
 73                f"Data pre-processing for the Fisheye8K dataset. Data from location {location_filter} is used, with {rare_classes} as the rare classes."
 74            )
 75
 76            # Build training and validation datasets
 77            view_train = select_by_class(view_location, classes_out=rare_classes)
 78            view_val = select_by_class(view_location, classes_in=rare_classes)
 79
 80            # Data export
 81            export_dir = os.path.join(self.export_root, dataset_name_ano_dec)
 82
 83            classes = self.dataset.distinct("ground_truth.detections.label")
 84            dataset_splits = ["train", "val"]
 85            dataset_type = fo.types.YOLOv5Dataset
 86
 87            view_train.export(
 88                export_dir=export_dir,
 89                dataset_type=dataset_type,
 90                label_field=gt_field,
 91                split=dataset_splits[0],
 92                classes=classes,
 93            )
 94
 95            view_val.export(
 96                export_dir=export_dir,
 97                dataset_type=dataset_type,
 98                label_field=gt_field,
 99                split=dataset_splits[1],
100                classes=classes,
101            )
102
103            # Load the exported dataset
104            if dataset_name_ano_dec in fo.list_datasets():
105                dataset_ano_dec = fo.load_dataset(dataset_name_ano_dec)
106                logging.info(f"Existing dataset {dataset_name_ano_dec} was loaded.")
107            else:
108                dataset_ano_dec = fo.Dataset(dataset_name_ano_dec)
109                for split in dataset_splits:
110                    dataset_ano_dec.add_dir(
111                        dataset_dir=export_dir,
112                        dataset_type=dataset_type,
113                        split=split,
114                        tags=split,
115                    )
116                dataset_ano_dec.compute_metadata()
117
118            self.dataset_ano_dec = dataset_ano_dec
119
120            # Select samples that include a rare class
121            anomalous_view = dataset_ano_dec.match_tags("val", "test")
122            logging.info(f"Processing {len(anomalous_view)} val samples")
123
124            # Prepare data for Anomalib
125            dataset_name_ano_dec_masks = f"{dataset_name_ano_dec}_masks"
126            export_dir_masks = os.path.join(
127                self.export_root, dataset_name_ano_dec_masks
128            )
129            os.makedirs(export_dir_masks, exist_ok=True)
130
131            for sample in anomalous_view.iter_samples(progress=True):
132                img_width = sample.metadata.width
133                img_height = sample.metadata.height
134                mask = Image.new(
135                    "L", (img_width, img_height), 0
136                )  # Create a black image
137                draw = ImageDraw.Draw(mask)
138                for bbox in sample.ground_truth.detections:
139                    if bbox.label in rare_classes:
140                        # Convert V51 format to image format
141
142                        x_min_rel, y_min_rel, width_rel, height_rel = bbox.bounding_box
143                        x_min = int(x_min_rel * img_width)
144                        y_min = int(y_min_rel * img_height)
145                        x_max = int((x_min_rel + width_rel) * img_width)
146                        y_max = int((y_min_rel + height_rel) * img_height)
147
148                        # draw.rectangle([x0, y0, x1, y1], fill=255)  # [x0, y0, x1, y1]
149                        draw.rectangle(
150                            [x_min, y_min, x_max, y_max], fill=255
151                        )  # [x0, y0, x1, y1]
152
153                # Save the mask
154                filename = os.path.basename(sample.filepath).replace(".jpg", ".png")
155                mask.save(os.path.join(export_dir_masks, f"{filename}"))

Prepares Fisheye8K dataset for anomaly detection by filtering data from one camera, separating rare classes, and generating binary masks.