Skip to content

iitbhgc

Data module for creating the data.

IITBHGCDataset

Bases: ETDataset

A PyTorch dataset for eye movement features.

Parameters:

Name Type Description Default
cfg Args

The configuration object.

required
text_data TextDataSet

The text data.

None
ia_scaler Union[MinMaxScaler, RobustScaler, StandardScaler]

Scaler for IA features.

required
fixation_scaler Union[MinMaxScaler, RobustScaler, StandardScaler, None]

Scaler for fixation features.

required
trial_features_scaler Union[MinMaxScaler, RobustScaler, StandardScaler, None]

The scaler for the trial features.

required
regime_name str

The regime name. Defaults to "".

required
set_name str

The set name. Defaults to "".

required
Source code in src/data/datasets/iitbhgc.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
class IITBHGCDataset(ETDataset):
    """
    A PyTorch dataset for eye movement features.

    Args:
        cfg (Args): The configuration object.
        text_data (TextDataSet): The text data.
        ia_scaler (Union[MinMaxScaler, RobustScaler, StandardScaler]): Scaler for IA features.
        fixation_scaler (Union[MinMaxScaler, RobustScaler, StandardScaler, None]):
            Scaler for fixation features.
        trial_features_scaler (Union[MinMaxScaler, RobustScaler, StandardScaler, None]):
            The scaler for the trial features.
        regime_name (str, optional): The regime name. Defaults to "".
        set_name (str, optional): The set name. Defaults to "".
    """

    def __init__(
        self,
        cfg: Args,
        ia_scaler: MinMaxScaler | RobustScaler | StandardScaler | None,
        fixation_scaler: MinMaxScaler | RobustScaler | StandardScaler | None,
        trial_features_scaler: MinMaxScaler | RobustScaler | StandardScaler | None,
        regime_name: SetNames,
        set_name: SetNames,
        text_data: TextDataSet | None = None,
    ):
        super().__init__(
            cfg=cfg,
            set_name=set_name,
            regime_name=regime_name,
            ia_scaler=ia_scaler,
            fixation_scaler=fixation_scaler,
            trial_features_scaler=trial_features_scaler,
            text_data=text_data,
        )

    def extract_trial_level_features(self) -> dict[str, torch.Tensor]:
        trial_level_features_list = []
        trial_level_features = self.trial_level_features.copy()
        trial_level_features = trial_level_features.drop(
            columns=self.ia_categorical_features,
            errors='ignore',
        )

        for grouped_data_key in tqdm(
            self.ordered_key_list, desc='Trial level features'
        ):
            try:
                trial_features = trial_level_features.loc[grouped_data_key]
            except KeyError:
                e1, e2, e3, e4 = (
                    grouped_data_key[0],
                    grouped_data_key[1],
                    grouped_data_key[2],
                    grouped_data_key[3],
                )
                trial_features = trial_level_features.loc[(e2, e3, e4, e1)]

            trial_features = ETDataset.normalize_features(
                trial_features,
                normalize=self.normalize,
                scaler=self.trial_features_scaler,
            )
            trial_level_features_list.append(trial_features)

        return {
            'trial_level_features': torch.tensor(
                np.array(trial_level_features_list),
                dtype=torch.float32,
            )
        }