A tuple containing: - data_train (pd.DataFrame): The training set. - data_val (pd.DataFrame): The validation set. - data_test (pd.DataFrame): The test set.
Examples
import numpy as npimport pandas as pdfrom spotforecast2_safe.splitter.split import split_rel_train_val_testrng = np.random.default_rng(0)idx = pd.date_range("2020-01-01", periods=100, freq="h")data = pd.DataFrame({"value": rng.standard_normal(100)}, index=idx)data_train, data_val, data_test = split_rel_train_val_test( data, perc_train=0.8, perc_val=0.1, verbose=False,)# Sizesassertlen(data_train) ==80assertlen(data_val) ==10assertlen(data_test) ==10print(f"Train: {len(data_train)}, Val: {len(data_val)}, Test: {len(data_test)}")# Full index coverage: union of the three splits equals the original indexcombined_index = data_train.index.append(data_val.index).append(data_test.index)assert combined_index.equals(data.index), "Union of splits must equal original index"# Temporal ordering: train ends before val, val ends before testassert data_train.index.max() < data_val.index.min()assert data_val.index.max() < data_test.index.min()print(f"Train ends: {data_train.index.max()}")print(f"Val starts: {data_val.index.min()}, ends: {data_val.index.max()}")print(f"Test starts: {data_test.index.min()}")