Skip to content

manydataset

ManyToManyDataset

Bases: Dataset

A PyTorch Dataset for many-to-many data.

Parameters:

Name Type Description Default
df_list List[DataFrame]

List of pandas DataFrames.

required
target str

The target column name.

required
drop Optional[Union[str, List[str]]]

Column(s) to drop from the DataFrames. Default is None.

None
dtype dtype

Data type for the tensors. Default is torch.float32.

float32

Attributes:

Name Type Description
data List[DataFrame]

List of pandas DataFrames with specified columns dropped.

target List[Tensor]

List of target tensors.

features List[Tensor]

List of feature tensors.

Examples:

>>> import pandas as pd
>>> from spotpython.data.manydataset import ManyToManyDataset
>>> df1 = pd.DataFrame({'feature1': [1, 2], 'feature2': [3, 4], 'target': [5, 6]})
>>> df2 = pd.DataFrame({'feature1': [7, 8], 'feature2': [9, 10], 'target': [11, 12]})
>>> dataset = ManyToManyDataset([df1, df2], target='target', drop='feature2')
>>> len(dataset)
2
>>> dataset[0]
(tensor([[1.],
         [2.]]), tensor([5., 6.]))
Source code in spotpython/data/manydataset.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
class ManyToManyDataset(Dataset):
    """
    A PyTorch Dataset for many-to-many data.

    Args:
        df_list (List[pd.DataFrame]): List of pandas DataFrames.
        target (str): The target column name.
        drop (Optional[Union[str, List[str]]]): Column(s) to drop from the DataFrames. Default is None.
        dtype (torch.dtype): Data type for the tensors. Default is torch.float32.

    Attributes:
        data (List[pd.DataFrame]): List of pandas DataFrames with specified columns dropped.
        target (List[torch.Tensor]): List of target tensors.
        features (List[torch.Tensor]): List of feature tensors.

    Examples:
        >>> import pandas as pd
        >>> from spotpython.data.manydataset import ManyToManyDataset
        >>> df1 = pd.DataFrame({'feature1': [1, 2], 'feature2': [3, 4], 'target': [5, 6]})
        >>> df2 = pd.DataFrame({'feature1': [7, 8], 'feature2': [9, 10], 'target': [11, 12]})
        >>> dataset = ManyToManyDataset([df1, df2], target='target', drop='feature2')
        >>> len(dataset)
        2
        >>> dataset[0]
        (tensor([[1.],
                 [2.]]), tensor([5., 6.]))
    """

    def __init__(
        self,
        df_list: List[pd.DataFrame],
        target: str,
        drop: Optional[Union[str, List[str]]] = None,
        dtype: torch.dtype = torch.float32,
    ):
        try:
            self.data = [df.drop(drop, axis=1) for df in df_list]
        except KeyError:
            self.data = df_list
        self.target = [torch.tensor(df[target].to_numpy(), dtype=dtype) for df in self.data]
        self.features = [torch.tensor(df.drop([target], axis=1).to_numpy(), dtype=dtype) for df in self.data]

    def __getitem__(self, index: int):
        x = self.features[index]
        y = self.target[index]
        return x, y

    def __len__(self) -> int:
        return len(self.data)

ManyToOneDataset

Bases: Dataset

A PyTorch Dataset for many-to-one data.

Parameters:

Name Type Description Default
df_list List[DataFrame]

List of pandas DataFrames.

required
target str

The target column name.

required
drop Optional[Union[str, List[str]]]

Column(s) to drop from the DataFrames. Default is None.

None
dtype dtype

Data type for the tensors. Default is torch.float32.

float32

Attributes:

Name Type Description
data List[DataFrame]

List of pandas DataFrames with specified columns dropped.

target List[Tensor]

List of target tensors.

features List[Tensor]

List of feature tensors.

Examples:

>>> import pandas as pd
>>> from spotpython.data.manydataset import ManyToOneDataset
>>> df1 = pd.DataFrame({'feature1': [1, 2], 'feature2': [3, 4], 'target': [5, 6]})
>>> df2 = pd.DataFrame({'feature1': [7, 8], 'feature2': [9, 10], 'target': [11, 12]})
>>> dataset = ManyToOneDataset([df1, df2], target='target', drop='feature2')
>>> len(dataset)
2
>>> dataset[0]
(tensor([[1.],
         [2.]]), tensor(5.))
Source code in spotpython/data/manydataset.py
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
class ManyToOneDataset(Dataset):
    """
    A PyTorch Dataset for many-to-one data.

    Args:
        df_list (List[pd.DataFrame]): List of pandas DataFrames.
        target (str): The target column name.
        drop (Optional[Union[str, List[str]]]): Column(s) to drop from the DataFrames. Default is None.
        dtype (torch.dtype): Data type for the tensors. Default is torch.float32.

    Attributes:
        data (List[pd.DataFrame]): List of pandas DataFrames with specified columns dropped.
        target (List[torch.Tensor]): List of target tensors.
        features (List[torch.Tensor]): List of feature tensors.

    Examples:
        >>> import pandas as pd
        >>> from spotpython.data.manydataset import ManyToOneDataset
        >>> df1 = pd.DataFrame({'feature1': [1, 2], 'feature2': [3, 4], 'target': [5, 6]})
        >>> df2 = pd.DataFrame({'feature1': [7, 8], 'feature2': [9, 10], 'target': [11, 12]})
        >>> dataset = ManyToOneDataset([df1, df2], target='target', drop='feature2')
        >>> len(dataset)
        2
        >>> dataset[0]
        (tensor([[1.],
                 [2.]]), tensor(5.))
    """

    def __init__(
        self,
        df_list: List[pd.DataFrame],
        target: str,
        drop: Optional[Union[str, List[str]]] = None,
        dtype: torch.dtype = torch.float32,
    ):
        try:
            self.data = [df.drop(drop, axis=1) for df in df_list]
        except KeyError:
            self.data = df_list
        self.target = [torch.tensor(df[target].to_numpy()[0], dtype=dtype) for df in self.data]
        self.features = [torch.tensor(df.drop([target], axis=1).to_numpy(), dtype=dtype) for df in self.data]

    def __getitem__(self, index: int):
        x = self.features[index]
        y = self.target[index]
        return x, y

    def __len__(self) -> int:
        return len(self.data)