Skip to content

template

TemplateProcessor

Bases: DatasetProcessor

Processor for DATASET_NAME dataset

Source code in src/data/preprocessing/dataset_preprocessing/template.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
class TemplateProcessor(DatasetProcessor):
    """Processor for DATASET_NAME dataset"""

    def get_column_map(self, data_type: DataType) -> dict:
        """Get column mapping for DATASET_NAME dataset"""
        # TODO: add docs
        if data_type == DataType.IA:
            return {}
        elif data_type == DataType.FIXATIONS:
            return {}

    def get_columns_to_keep(self) -> list:
        """Get list of columns to keep after filtering"""
        # TODO: add docs
        return []

    def dataset_specific_processing(
        self, data_dict: dict[str, pd.DataFrame]
    ) -> dict[str, pd.DataFrame]:
        """Dataset-specific processing steps"""
        # TODO: add docs
        for data_type in [DataType.IA, DataType.FIXATIONS]:
            if data_type not in data_dict or data_dict[data_type] is None:
                continue
            # load data
            df = data_dict[data_type]

            # add ids
            # add unique_trial_id column
            df['unique_trial_id'] = (
                df['participant_id'].astype(str)
                + '_'
                + df['unique_paragraph_id'].astype(str)
                + '_'
                + df['practice_trial'].astype(str)
            )
            # filter rows?
            # add labels of tasks?

            data_dict[data_type] = df

        # add_ia_report_features_to_fixation_data ?
        # add_missing_features ?
        # compute_trial_level_features ?

        return data_dict

    def add_ia_report_features_to_fixation_data(
        self,
        ia_df: pd.DataFrame,
        fix_df: pd.DataFrame,
    ) -> pd.DataFrame:
        """
        # TODO: add docs
        #     # --- 1. Unify IA‑ID column name ----------------------------------------
        #     # --- 2. Build the list of IA features we plan to add -------------------
        #     # --- 3. Drop columns that also exist in fixation table -----------------
        #     # --- 4. Clean nuisance column ------------------------------------------
        #     # --- 5. Merge ----------------------------------------------------------
        #     return
        """

add_ia_report_features_to_fixation_data(ia_df, fix_df)

TODO: add docs

# --- 1. Unify IA‑ID column name ----------------------------------------

# --- 2. Build the list of IA features we plan to add -------------------

# --- 3. Drop columns that also exist in fixation table -----------------

# --- 4. Clean nuisance column ------------------------------------------

# --- 5. Merge ----------------------------------------------------------

return

Source code in src/data/preprocessing/dataset_preprocessing/template.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def add_ia_report_features_to_fixation_data(
    self,
    ia_df: pd.DataFrame,
    fix_df: pd.DataFrame,
) -> pd.DataFrame:
    """
    # TODO: add docs
    #     # --- 1. Unify IA‑ID column name ----------------------------------------
    #     # --- 2. Build the list of IA features we plan to add -------------------
    #     # --- 3. Drop columns that also exist in fixation table -----------------
    #     # --- 4. Clean nuisance column ------------------------------------------
    #     # --- 5. Merge ----------------------------------------------------------
    #     return
    """

dataset_specific_processing(data_dict)

Dataset-specific processing steps

Source code in src/data/preprocessing/dataset_preprocessing/template.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def dataset_specific_processing(
    self, data_dict: dict[str, pd.DataFrame]
) -> dict[str, pd.DataFrame]:
    """Dataset-specific processing steps"""
    # TODO: add docs
    for data_type in [DataType.IA, DataType.FIXATIONS]:
        if data_type not in data_dict or data_dict[data_type] is None:
            continue
        # load data
        df = data_dict[data_type]

        # add ids
        # add unique_trial_id column
        df['unique_trial_id'] = (
            df['participant_id'].astype(str)
            + '_'
            + df['unique_paragraph_id'].astype(str)
            + '_'
            + df['practice_trial'].astype(str)
        )
        # filter rows?
        # add labels of tasks?

        data_dict[data_type] = df

    # add_ia_report_features_to_fixation_data ?
    # add_missing_features ?
    # compute_trial_level_features ?

    return data_dict

get_column_map(data_type)

Get column mapping for DATASET_NAME dataset

Source code in src/data/preprocessing/dataset_preprocessing/template.py
12
13
14
15
16
17
18
def get_column_map(self, data_type: DataType) -> dict:
    """Get column mapping for DATASET_NAME dataset"""
    # TODO: add docs
    if data_type == DataType.IA:
        return {}
    elif data_type == DataType.FIXATIONS:
        return {}

get_columns_to_keep()

Get list of columns to keep after filtering

Source code in src/data/preprocessing/dataset_preprocessing/template.py
20
21
22
23
def get_columns_to_keep(self) -> list:
    """Get list of columns to keep after filtering"""
    # TODO: add docs
    return []