Source code for elasticai.preprocessor._check_funcs

from dataclasses import dataclass


[docs] def check_key_elements(key: str, elements: list[str]) -> bool: """Function for checking if all elements are in key (logical AND) :param key: Key to check :param elements: List of elements to check if available in key :return: True if all elements are present in key """ return any(elem == key for elem in elements)
[docs] def check_string_equal_elements_all(text: str, elements: list[str]) -> bool: """Function for checking if all elements are in text string (logical AND) :param text : String with a text :param elements: List of elements to check if available in text :return: True if all elements are present in text """ return all(elem in text for elem in elements)
[docs] def check_string_equal_elements_any(text: str, elements: list[str]) -> bool: """Function for checking if elements are in text string (logical OR) :param text: String with a text :param elements: List of elements to check if available in text :return: True if any elements are present in text """ val = any(elem in text for elem in elements) return val
[docs] def check_keylist_elements_all(keylist: list[str], elements: list[str]) -> bool: """Function for checking if all elements are in key list (logical AND) :param keylist: List with keys to check :param elements: List with elements to check if available in key :return: True if all elements are present in key """ return all(elem in keylist for elem in elements) if len(keylist) else True
[docs] def check_keylist_elements_any(keylist: list[str], elements: list[str]) -> bool: """Function for checking if all elements are in key list (logical OR) :param keylist: List with keys to check :param elements: List with elements to check if available in key :return: True if any elements are present in key """ return any(elem in keylist for elem in elements) if len(keylist) else True
[docs] def check_elem_unique(elements: list) -> bool: """Function for checking if all elements are unique :param elements: List of elements to check :return: True if all elements are unique """ from collections import Counter from itertools import chain chck = elements if not type(elements[0]) == list else list(chain.from_iterable(elements)) return all(cnt == 1 for cnt in Counter(chck).values())
[docs] def check_value_range(value: float | int, range: list[float | int]) -> bool: """Function for checking if value is within range :param value: Value to check (float or integer) :param range: List with two values to indicate the range :return: Boolean if value is in range """ assert len(range) == 2, "Array should have 2 elements [min, max]" return range[0] <= value <= range[1]
[docs] def is_close(value: float, target: float, tolerance: float = 0.05) -> bool: """Function for checking if float value is in near of the target value :param value: Float value to check :param target: Target value :param tolerance: Tolerance value [around target value] """ assert tolerance > 0 return abs(value - target) <= abs(tolerance)
[docs] @dataclass class MetricTimestamps: """Class with metrics for comparing timestamps of predicted classes and true classes Attributes: f1_score: Float with F1-Score TP: Integer with true positives FP: Integer with false positives FN: Integer with false negatives """ f1_score: float TP: int FP: int FN: int
[docs] def compare_timestamps(true_labels: list, pred_labels: list, window: int = 2) -> MetricTimestamps: """This function compares the timestamps of the predicted classes and the true classes and returns TP, FP, FN and new arrays which only contain the classes that have matched timestamps in both arrays. The function should be used before plotting a confusion matrix of the classes when working with actual data from the pipeline. Args: true_labels: List with true labels pred_labels: List with predicted labels window: Window size for acceptance rate Returns: Class MetricTimeStamps with metrics """ new_pred = [] false_negative = 0 true_positive_same = 0 true_positive_diff = 0 for i in range(0, max(true_labels[-1], pred_labels[-1]) + 1): if i in true_labels: found = False for j in range(i - int(window), i + int(window) + 1): if j in pred_labels: pos_true = true_labels.index(i) pos_pred = pred_labels.index(j) new_pred.append(pred_labels[pos_pred]) if true_labels[pos_true] == pred_labels[pos_pred]: true_positive_same += 1 else: true_positive_diff += 1 found = True if not found: false_negative += 1 if len(pred_labels) - len(true_labels) > 0: false_positive = len(pred_labels) - len(true_labels) else: false_positive = 0 true_positive = true_positive_same + true_positive_diff f1_score = true_positive / (true_positive + false_positive + false_negative) return MetricTimestamps(f1_score=f1_score, FN=false_negative, FP=false_positive, TP=true_positive)