File size: 4,212 Bytes
1f53a4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pandas as pd
import torch
from typing import List, Dict


class Likelihood:
    """
    Class for making likelihood.
    """
    def __init__(self, task: str, num_outputs_for_label: Dict[str, int]) -> None:
        """
        Args:
            task (str): task
            num_outputs_for_label (Dict[str, int]): number of classes for each label
        """
        self.task = task
        self.num_outputs_for_label = num_outputs_for_label
        self.base_column_list = self._set_base_columns(self.task)
        self.pred_column_list = self._make_pred_columns(self.task, self.num_outputs_for_label)

    def _set_base_columns(self, task: str) -> List[str]:
        """
        Return base columns.

        Args:
            task (str): task

        Returns:
            List[str]: base columns except columns of label and prediction
        """
        if (task == 'classification') or (task == 'regression'):
            base_columns = ['uniqID', 'group', 'imgpath', 'split']
            return base_columns
        elif task == 'deepsurv':
            base_columns = ['uniqID', 'group', 'imgpath', 'split', 'periods']
            return base_columns
        else:
            raise ValueError(f"Invalid task: {task}.")

    def _make_pred_columns(self, task: str, num_outputs_for_label: Dict[str, int]) -> Dict[str, List[str]]:
        """
        Make column names of predictions with label name and its number of classes.

        Args:
            task (str):  task
            num_outputs_for_label (Dict[str, int]): number of classes for each label

        Returns:
            Dict[str, List[str]]: label and list of columns of predictions with its class number

        eg.
        {label_A: 2, label_B: 2} -> {label_A: [pred_label_A_0, pred_label_A_1], label_B: [pred_label_B_0, pred_label_B_1]}
        {label_A: 1, label_B: 1} -> {label_A: [pred_label_A], label_B: [pred_label_B]}
        """
        pred_columns = dict()
        if task == 'classification':
            for label_name, num_classes in num_outputs_for_label.items():
                pred_columns[label_name] = ['pred_' + label_name + '_' + str(i) for i in range(num_classes)]
            return pred_columns
        elif (task == 'regression') or (task == 'deepsurv'):
            for label_name, num_classes in num_outputs_for_label.items():
                pred_columns[label_name] = ['pred_' + label_name]
            return pred_columns
        else:
            raise ValueError(f"Invalid task: {task}.")

    def make_format(self, data: Dict, output: Dict[str, torch.Tensor]) -> pd.DataFrame:
            """
            Make a new DataFrame of likelihood every batch.

            Args:
                data (Dict): batch data from dataloader
                output (Dict[str, torch.Tensor]): output of model
            """
            _likelihood = {column_name: data[column_name] for column_name in self.base_column_list}
            df_likelihood = pd.DataFrame(_likelihood)

            if any(data['labels']):
                for label_name, pred in output.items():
                    _df_label = pd.DataFrame({label_name: data['labels'][label_name].tolist()})
                    pred = pred.to('cpu').detach().numpy().copy()
                    _df_pred = pd.DataFrame(pred, columns=self.pred_column_list[label_name])
                    df_likelihood = pd.concat([df_likelihood, _df_label, _df_pred], axis=1)
                return df_likelihood
            else:
                for label_name, pred in output.items():
                    pred = pred.to('cpu').detach().numpy().copy()
                    _df_pred = pd.DataFrame(pred, columns=self.pred_column_list[label_name])
                    df_likelihood = pd.concat([df_likelihood, _df_pred], axis=1)
                return df_likelihood


def set_likelihood(task: str, num_outputs_for_label: Dict[str, int]) -> Likelihood:
    """
    Set likelihood.

    Args:
        task (str): task
        num_outputs_for_label (Dict[str, int]): number of classes for each label

    Returns:
            Likelihood: instance of class Likelihood
    """
    return Likelihood(task, num_outputs_for_label)