Visualize outliers in DataFrame using stacked histograms.
Creates a histogram for each specified column, displaying both regular data and detected outliers in different colors. Uses IsolationForest for outlier detection.
Visualize outliers in time series using Plotly scatter plots.
Creates an interactive time series plot for each specified column, showing regular data as a line and detected outliers as scatter points. Uses IsolationForest for outlier detection.
import numpy as npimport pandas as pdimport plotly.graph_objects as gofrom spotforecast2_safe.preprocessing.outlier import get_outliersfrom spotforecast2.plots.outlier_plots import visualize_outliers_plotly_scatterrng = np.random.default_rng(0)dates = pd.date_range("2024-01-01", periods=30, freq="h")normal_vals = rng.normal(loc=20.0, scale=2.0, size=28)outlier_vals_arr = [60.0, 65.0] # two obvious outliersdata_original = pd.DataFrame( {"temperature": np.concatenate([normal_vals, outlier_vals_arr])}, index=dates,)data_cleaned = data_original.copy()# Verify that get_outliers detects the planted outliers before plottingdetected = get_outliers( data_original, data_original=data_original, contamination=0.07)assertlen(detected["temperature"]) >=1, "Expected at least one outlier"# Renders an interactive Plotly time series with outliers marked in redvisualize_outliers_plotly_scatter( data_cleaned, data_original, columns=["temperature"], contamination=0.07,)print(f"Detected {len(detected['temperature'])} outlier(s) in 'temperature'")