mertkarabacak commited on
Commit
ad0e25f
0 Parent(s):

Duplicate from MSHS-Neurosurgery-Research/NCDB-LGG

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.npy filter=lfs diff=lfs merge=lfs -text
14
+ *.npz filter=lfs diff=lfs merge=lfs -text
15
+ *.onnx filter=lfs diff=lfs merge=lfs -text
16
+ *.ot filter=lfs diff=lfs merge=lfs -text
17
+ *.parquet filter=lfs diff=lfs merge=lfs -text
18
+ *.pb filter=lfs diff=lfs merge=lfs -text
19
+ *.pickle filter=lfs diff=lfs merge=lfs -text
20
+ *.pkl filter=lfs diff=lfs merge=lfs -text
21
+ *.pt filter=lfs diff=lfs merge=lfs -text
22
+ *.pth filter=lfs diff=lfs merge=lfs -text
23
+ *.rar filter=lfs diff=lfs merge=lfs -text
24
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
25
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
26
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
27
+ *.tflite filter=lfs diff=lfs merge=lfs -text
28
+ *.tgz filter=lfs diff=lfs merge=lfs -text
29
+ *.wasm filter=lfs diff=lfs merge=lfs -text
30
+ *.xz filter=lfs diff=lfs merge=lfs -text
31
+ *.zip filter=lfs diff=lfs merge=lfs -text
32
+ *.zst filter=lfs diff=lfs merge=lfs -text
33
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
34
+ acdf_final.csv filter=lfs diff=lfs merge=lfs -text
35
+ acdf_imputed.csv filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: NCDB-LGG
3
+ emoji: 🧠
4
+ colorFrom: blue
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.27.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: MSHS-Neurosurgery-Research/NCDB-LGG
11
+ ---
app.py ADDED
@@ -0,0 +1,663 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ HF_TOKEN = os.getenv("HF_TOKEN")
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ import sklearn
7
+ import sklearn.metrics
8
+ from math import sqrt
9
+ from scipy import stats as st
10
+ from matplotlib import pyplot as plt
11
+
12
+ from sklearn.linear_model import LogisticRegression
13
+
14
+ import shap
15
+ import gradio as gr
16
+ import random
17
+ import re
18
+ import textwrap
19
+ from datasets import load_dataset
20
+
21
+
22
+ #Read data training data.
23
+
24
+ x1 = pd.read_csv("m12_data_train.csv", index_col = 0, low_memory = False)
25
+
26
+ x2 = pd.read_csv("m24_data_train.csv", index_col = 0, low_memory = False)
27
+
28
+ x3 = pd.read_csv("m36_data_train.csv", index_col = 0, low_memory = False)
29
+
30
+ x4 = pd.read_csv("m60_data_train.csv", index_col = 0, low_memory = False)
31
+
32
+
33
+ #Read validation data.
34
+
35
+ x1_valid = pd.read_csv("m12_data_valid.csv", index_col = 0, low_memory = False)
36
+
37
+ x2_valid = pd.read_csv("m24_data_valid.csv", index_col = 0, low_memory = False)
38
+
39
+ x3_valid = pd.read_csv("m36_data_valid.csv", index_col = 0, low_memory = False)
40
+
41
+ x4_valid = pd.read_csv("m60_data_valid.csv", index_col = 0, low_memory = False)
42
+
43
+
44
+ #Define feature names.
45
+ f1_names = list(x1.columns)
46
+ f1_names = [f1.replace('__', ' - ') for f1 in f1_names]
47
+ f1_names = [f1.replace('_', ' ') for f1 in f1_names]
48
+
49
+ f2_names = list(x2.columns)
50
+ f2_names = [f2.replace('__', ' - ') for f2 in f2_names]
51
+ f2_names = [f2.replace('_', ' ') for f2 in f2_names]
52
+
53
+ f3_names = list(x3.columns)
54
+ f3_names = [f3.replace('__', ' - ') for f3 in f3_names]
55
+ f3_names = [f3.replace('_', ' ') for f3 in f3_names]
56
+
57
+ f4_names = list(x4.columns)
58
+ f4_names = [f4.replace('__', ' - ') for f4 in f4_names]
59
+ f4_names = [f4.replace('_', ' ') for f4 in f4_names]
60
+
61
+
62
+ #Prepare training data for the outcome 1 (prolonged LOS).
63
+ y1 = x1.pop('OUTCOME')
64
+
65
+ #Prepare validation data for the outcome 1 (prolonged LOS).
66
+ y1_valid = x1_valid.pop('OUTCOME')
67
+
68
+ #Prepare training data for the outcome 2 (non-home discharges).
69
+ y2 = x2.pop('OUTCOME')
70
+
71
+ #Prepare validation data for the outcome 2 (non-home discharges).
72
+ y2_valid = x2_valid.pop('OUTCOME')
73
+
74
+ #Prepare training data for the outcome 3 (30-day readmissions).
75
+ y3 = x3.pop('OUTCOME')
76
+
77
+ #Prepare validation data for the outcome 3 (30-day readmissions).
78
+ y3_valid = x3_valid.pop('OUTCOME')
79
+
80
+ #Prepare training data for the outcome 4 (unplanned reoperations).
81
+ y4 = x4.pop('OUTCOME')
82
+
83
+ #Prepare validation data for the outcome 4 (unplanned reoperations).
84
+ y4_valid = x4_valid.pop('OUTCOME')
85
+
86
+
87
+ #Assign hyperparameters.
88
+
89
+ y1_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'lambda_l1': 9.636570621064845e-06, 'lambda_l2': 0.003747385566704826, 'num_leaves': 8, 'feature_fraction': 0.5227250594804498, 'bagging_fraction': 0.44654591882113764, 'bagging_freq': 4, 'min_child_samples': 72, 'metric': 'binary_logloss', 'verbosity': -1, 'random_state': 31}
90
+ y2_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'lambda_l1': 8.065104217173182, 'lambda_l2': 0.009368546720767723, 'num_leaves': 15, 'feature_fraction': 0.4640347984930871, 'bagging_fraction': 0.4385618748071607, 'bagging_freq': 3, 'min_child_samples': 66, 'metric': 'binary_logloss', 'verbosity': -1, 'random_state': 31}
91
+ y3_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'lambda_l1': 1.2748078853140657e-05, 'lambda_l2': 0.6573192301848148, 'num_leaves': 39, 'feature_fraction': 0.7143554798223156, 'bagging_fraction': 0.5791644571313095, 'bagging_freq': 7, 'min_child_samples': 50, 'metric': 'binary_logloss', 'verbosity': -1, 'random_state': 31}
92
+ y4_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'lambda_l1': 7.088664776651091e-06, 'lambda_l2': 4.1646943899743127e-08, 'num_leaves': 36, 'feature_fraction': 0.436937199531357, 'bagging_fraction': 0.8226045360995117, 'bagging_freq': 4, 'min_child_samples': 68, 'metric': 'binary_logloss', 'verbosity': -1, 'random_state': 31}
93
+
94
+ #Training models.
95
+
96
+ from lightgbm import LGBMClassifier
97
+ lgb = LGBMClassifier(**y1_params)
98
+ y1_model = lgb
99
+
100
+ y1_model = y1_model.fit(x1, y1)
101
+ y1_explainer = shap.Explainer(y1_model.predict, x1)
102
+ y1_calib_probs = y1_model.predict_proba(x1_valid)
103
+ y1_calib_model = LogisticRegression()
104
+ y1_calib_model = y1_calib_model.fit(y1_calib_probs, y1_valid)
105
+
106
+
107
+ from lightgbm import LGBMClassifier
108
+ lgb = LGBMClassifier(**y2_params)
109
+ y2_model = lgb
110
+
111
+ y2_model = y2_model.fit(x2, y2)
112
+ y2_explainer = shap.Explainer(y2_model.predict, x2)
113
+ y2_calib_probs = y2_model.predict_proba(x2_valid)
114
+ y2_calib_model = LogisticRegression()
115
+ y2_calib_model = y2_calib_model.fit(y2_calib_probs, y2_valid)
116
+
117
+
118
+ from lightgbm import LGBMClassifier
119
+ lgb = LGBMClassifier(**y3_params)
120
+ y3_model = lgb
121
+
122
+ y3_model = y3_model.fit(x3, y3)
123
+ y3_explainer = shap.Explainer(y3_model.predict, x3)
124
+ y3_calib_probs = y3_model.predict_proba(x3_valid)
125
+ y3_calib_model = LogisticRegression()
126
+ y3_calib_model = y3_calib_model.fit(y3_calib_probs, y3_valid)
127
+
128
+
129
+ from lightgbm import LGBMClassifier
130
+ lgb = LGBMClassifier(**y4_params)
131
+ y4_model = lgb
132
+
133
+ y4_model = y4_model.fit(x4, y4)
134
+ y4_explainer = shap.Explainer(y4_model.predict, x4)
135
+ y4_calib_probs = y4_model.predict_proba(x4_valid)
136
+ y4_calib_model = LogisticRegression()
137
+ y4_calib_model = y4_calib_model.fit(y4_calib_probs, y4_valid)
138
+
139
+
140
+ output_y1 = (
141
+ """
142
+ <br/>
143
+ <center>The probability of 12-month survival:</center>
144
+ <br/>
145
+ <center><h1>{:.2f}%</h1></center>
146
+ """
147
+ )
148
+
149
+ output_y2 = (
150
+ """
151
+ <br/>
152
+ <center>The probability of 24-month survival:</center>
153
+ <br/>
154
+ <center><h1>{:.2f}%</h1></center>
155
+ """
156
+ )
157
+
158
+ output_y3 = (
159
+ """
160
+ <br/>
161
+ <center>The probability of 36-month survival:</center>
162
+ <br/>
163
+ <center><h1>{:.2f}%</h1></center>
164
+ """
165
+ )
166
+
167
+ output_y4 = (
168
+ """
169
+ <br/>
170
+ <center>The probability of 60-month survival:</center>
171
+ <br/>
172
+ <center><h1>{:.2f}%</h1></center>
173
+ """
174
+ )
175
+
176
+
177
+ #Define predict for y1.
178
+ def y1_predict(*args):
179
+ df1 = pd.DataFrame([args], columns=x1.columns)
180
+ pos_pred = y1_model.predict_proba(df1)
181
+ pos_pred = y1_calib_model.predict_proba(pos_pred)
182
+ prob = pos_pred[0][1]
183
+ prob = 1-prob
184
+ output = output_y1.format(prob * 100)
185
+ return output
186
+
187
+ #Define predict for y2.
188
+ def y2_predict(*args):
189
+ df2 = pd.DataFrame([args], columns=x2.columns)
190
+ pos_pred = y2_model.predict_proba(df2)
191
+ pos_pred = y2_calib_model.predict_proba(pos_pred)
192
+ prob = pos_pred[0][1]
193
+ prob = 1-prob
194
+ output = output_y2.format(prob * 100)
195
+ return output
196
+
197
+ #Define predict for y3.
198
+ def y3_predict(*args):
199
+ df3 = pd.DataFrame([args], columns=x3.columns)
200
+ pos_pred = y3_model.predict_proba(df3)
201
+ pos_pred = y3_calib_model.predict_proba(pos_pred)
202
+ prob = pos_pred[0][1]
203
+ prob = 1-prob
204
+ output = output_y3.format(prob * 100)
205
+ return output
206
+
207
+ #Define predict for y4.
208
+ def y4_predict(*args):
209
+ df4 = pd.DataFrame([args], columns=x4.columns)
210
+ pos_pred = y4_model.predict_proba(df4)
211
+ pos_pred = y4_calib_model.predict_proba(pos_pred)
212
+ prob = pos_pred[0][1]
213
+ prob = 1-prob
214
+ output = output_y4.format(prob * 100)
215
+ return output
216
+
217
+
218
+ #Define function for wrapping feature labels.
219
+ def wrap_labels(ax, width, break_long_words=False):
220
+ labels = []
221
+ for label in ax.get_yticklabels():
222
+ text = label.get_text()
223
+ labels.append(textwrap.fill(text, width=width, break_long_words=break_long_words))
224
+ ax.set_yticklabels(labels, rotation=0)
225
+
226
+
227
+ #Define interpret for y1.
228
+ def y1_interpret(*args):
229
+ df1 = pd.DataFrame([args], columns=x1.columns)
230
+ shap_values1 = y1_explainer(df1).values
231
+ shap_values1 = np.abs(shap_values1)
232
+ shap.bar_plot(shap_values1[0], max_display = 10, show = False, feature_names = f1_names)
233
+ fig = plt.gcf()
234
+ ax = plt.gca()
235
+ wrap_labels(ax, 20)
236
+ ax.figure
237
+ plt.tight_layout()
238
+ fig.set_figheight(7)
239
+ fig.set_figwidth(9)
240
+ plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
241
+ plt.tick_params(axis="y",direction="out", labelsize = 12)
242
+ plt.tick_params(axis="x",direction="out", labelsize = 12)
243
+ return fig
244
+
245
+ #Define interpret for y2.
246
+ def y2_interpret(*args):
247
+ df2 = pd.DataFrame([args], columns=x2.columns)
248
+ shap_values2 = y2_explainer(df2).values
249
+ shap_values2 = np.abs(shap_values2)
250
+ shap.bar_plot(shap_values2[0], max_display = 10, show = False, feature_names = f2_names)
251
+ fig = plt.gcf()
252
+ ax = plt.gca()
253
+ wrap_labels(ax, 20)
254
+ ax.figure
255
+ plt.tight_layout()
256
+ fig.set_figheight(7)
257
+ fig.set_figwidth(9)
258
+ plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
259
+ plt.tick_params(axis="y",direction="out", labelsize = 12)
260
+ plt.tick_params(axis="x",direction="out", labelsize = 12)
261
+ return fig
262
+
263
+ #Define interpret for y3.
264
+ def y3_interpret(*args):
265
+ df3 = pd.DataFrame([args], columns=x3.columns)
266
+ shap_values3 = y3_explainer(df3).values
267
+ shap_values3 = np.abs(shap_values3)
268
+ shap.bar_plot(shap_values3[0], max_display = 10, show = False, feature_names = f3_names)
269
+ fig = plt.gcf()
270
+ ax = plt.gca()
271
+ wrap_labels(ax, 20)
272
+ ax.figure
273
+ plt.tight_layout()
274
+ fig.set_figheight(7)
275
+ fig.set_figwidth(9)
276
+ plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
277
+ plt.tick_params(axis="y",direction="out", labelsize = 12)
278
+ plt.tick_params(axis="x",direction="out", labelsize = 12)
279
+ return fig
280
+
281
+ #Define interpret for y4.
282
+ def y4_interpret(*args):
283
+ df4 = pd.DataFrame([args], columns=x4.columns)
284
+ shap_values4 = y4_explainer(df4).values
285
+ shap_values4 = np.abs(shap_values4)
286
+ shap.bar_plot(shap_values4[0], max_display = 10, show = False, feature_names = f4_names)
287
+ fig = plt.gcf()
288
+ ax = plt.gca()
289
+ wrap_labels(ax, 20)
290
+ ax.figure
291
+ plt.tight_layout()
292
+ fig.set_figheight(7)
293
+ fig.set_figwidth(9)
294
+ plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
295
+ plt.tick_params(axis="y",direction="out", labelsize = 12)
296
+ plt.tick_params(axis="x",direction="out", labelsize = 12)
297
+ return fig
298
+
299
+
300
+ with gr.Blocks(title = "NCDB-LGG") as demo:
301
+
302
+ gr.Markdown(
303
+ """
304
+ <br/>
305
+ <center><h1>LGG Survival Outcomes</h1></center>
306
+ <center><h2>Prediction Tool</h2></center>
307
+ <center><i>The publication describing the details of this predictive tool will be posted here upon the acceptance of publication.</i><center>
308
+ """
309
+ )
310
+
311
+ gr.Markdown(
312
+ """
313
+ <center><h3>Model Performances</h3></center>
314
+ <div style="text-align:center;">
315
+ <table style="width:100%;">
316
+ <tr>
317
+ <th>Outcome</th>
318
+ <th>Algorithm</th>
319
+ <th>Sensitivity</th>
320
+ <th>Specificity</th>
321
+ <th>Accuracy</th>
322
+ <th>AUPRC</th>
323
+ <th>AUROC</th>
324
+ <th>Brier Score</th>
325
+ </tr>
326
+ <tr>
327
+ <td>12-Month Mortality</td>
328
+ <td>LightGBM</td>
329
+ <td>0.758 (0.745 - 0.771)</td>
330
+ <td>0.836 (0.825 - 0.847)</td>
331
+ <td>0.821 (0.810 - 0.832)</td>
332
+ <td>0.649 (0.635 - 0.663)</td>
333
+ <td>0.889 (0.865 - 0.890)</td>
334
+ <td>0.101 (0.092 - 0.110)</td>
335
+ </tr>
336
+ <tr>
337
+ <td>24-Month Mortality</td>
338
+ <td>LightGBM</td>
339
+ <td>0.771 (0.758 - 0.784)</td>
340
+ <td>0.844 (0.833 - 0.855)</td>
341
+ <td>0.822 (0.811 - 0.833)</td>
342
+ <td>0.783 (0.771 - 0.795)</td>
343
+ <td>0.892 (0.873 - 0.894)</td>
344
+ <td>0.123 (0.113 - 0.133)</td>
345
+ </tr>
346
+ <tr>
347
+ <td>36-Month Mortality</td>
348
+ <td>LightGBM</td>
349
+ <td>0.771 (0.758 - 0.784)</td>
350
+ <td>0.825 (0.813 - 0.837)</td>
351
+ <td>0.803 (0.791 - 0.815)</td>
352
+ <td>0.834 (0.823 - 0.845)</td>
353
+ <td>0.884 (0.865 - 0.886)</td>
354
+ <td>0.139 (0.128 - 0.150)</td>
355
+ </tr>
356
+ <tr>
357
+ <td>60-Month Mortality</td>
358
+ <td>LightGBM</td>
359
+ <td>0.789 (0.775 - 0.803)</td>
360
+ <td>0.819 (0.806 - 0.832)</td>
361
+ <td>0.802 (0.789 - 0.815)</td>
362
+ <td>0.905 (0.895 - 0.915)</td>
363
+ <td>0.870 (0.864 - 0.887)</td>
364
+ <td>0.142 (0.130 - 0.154)</td>
365
+ </tr>
366
+ </table>
367
+ </div>
368
+ """
369
+ )
370
+
371
+ with gr.Row():
372
+
373
+ with gr.Column():
374
+
375
+ Age = gr.Slider(label="Age", minimum = 18, maximum = 99, step = 1, value = 55)
376
+
377
+ Sex = gr.Dropdown(label = "Sex", choices = ['Male', 'Female'], type = 'index', value = 'Male')
378
+
379
+ Race = gr.Dropdown(label = "Race", choices = ['White', 'Black', 'Asian Indian or Pakistani', 'Chinese', 'Filipino', 'American Indian, Aleutian, or Eskimo', 'Vietnamese', 'Korean', 'Other or Unknown'], type = 'index', value = 'White')
380
+
381
+ Hispanic_Ethnicity = gr.Dropdown(label = "Hispanic Ethnicity", choices = ['No', 'Yes', 'Unknown'], type = 'index', value = 'No')
382
+
383
+ Primary_Payor = gr.Dropdown(label = "Primary Payor", choices = ['Private insurance', 'Medicare', 'Medicaid', 'Other government', 'Not insured', 'Unknown'], type = 'index', value = 'Private insurance')
384
+
385
+ Facility_Type = gr.Dropdown(label = "Facility Type", choices = ['Academic/Research Program', 'Comprehensive Community Cancer Program', 'Integrated Network Cancer Program', 'Community Cancer Program', 'Other or Unknown'], type = 'index', value = 'Academic/Research Program')
386
+
387
+ Facility_Location = gr.Dropdown(label = "Facility Location", choices = ['South Atlantic', 'East North Central', 'Middle Atlantic', 'East North Central', 'Middle Atlantic', 'Pacific', 'West South Central', 'West North Central', 'East South Central', 'New England', 'Mountain', 'Unknown or Other'], type = 'index', value = 'South Atlantic')
388
+
389
+ CharlsonDeyo_Score = gr.Dropdown(label = "Charlson-Deyo Score", choices = ['0', '1', '2', 'Greater than 3'], type = 'index', value = '0')
390
+ Karnofsky_Performance_Scale = gr.Dropdown(label = "Karnofsky Performance Scale", choices = ['KPS 0-20', 'KPS 21-40', 'KPS 41-60', 'KPS 61-80', 'KPS 81-100', 'Unknown'], type = 'index', value = 'KPS 81-100')
391
+
392
+ Laterality = gr.Dropdown(label = "Laterality", choices = ['Right', 'Left', 'Bilateral', 'Midline', 'Unknown'], type = 'index', value = 'Right')
393
+
394
+ Tumor_Localization = gr.Dropdown(label = "Tumor Localization", choices = ['Frontal lobe', 'Temporal lobe', 'Parietal lobe', 'Occipital lobe', 'Overlapping', 'Intraventricular', 'Cerebellum', 'Brain stem', 'Unknown'], type = 'index', value = 'Frontal lobe')
395
+
396
+ Focality = gr.Dropdown(label = "Focality", choices = ['Unifocal', 'Multifocal', 'Unknown'], type = 'index', value = 'Unifocal')
397
+
398
+ Diagnostic_Biopsy = gr.Dropdown(label = "Diagnostic Biopsy", choices = ['No', 'Yes', 'Unknown'], type = 'index', value = 'No')
399
+
400
+ Tumor_Size = gr.Dropdown(label = "Tumor Size", choices = ['< 2 cm', '2 - 3.9 cm', '4 - 5.9 cm', '6 - 7.9 cm', '8 - 9.9 cm', '10 - 11.9 cm', '12 - 13.9 cm', '14 - 15.9 cm', '16 - 17.9 cm', '18 - 19.9 cm', '> 20 cm', 'Unknown'], type = 'index', value = '< 2 cm')
401
+
402
+ Histology = gr.Dropdown(label = "Histology", choices = ['Astrocytoma', 'Oligodendroglioma', 'Oligoastrocytoma'], type = 'index', value = 'Astrocytoma')
403
+
404
+ Grade = gr.Dropdown(label = "Grade", choices = ['Grade II', 'Grade III'], type = 'index', value = 'Grade II')
405
+
406
+ CoDeletion_1p19q = gr.Dropdown(label = "1p19q Co-Deletion", choices = ['No', 'Yes', 'Unknown'], type = 'index', value = 'No')
407
+
408
+ MGMT_Methylation = gr.Dropdown(label = "MGMT Methylation", choices = ['Unmethylated', 'Methylated', 'Unknown'], type = 'index', value = 'Unmethylated')
409
+
410
+ Ki67_Labeling_Index = gr.Dropdown(label = 'Ki-67 Labeling Index', choices = ['0-20%', '21-40%', '41-60%', '61-80%', '81-100%', 'Normal (no percentage available)', 'Slightly elevated (no percentage available)', 'Elevated (no percentage available)', 'Unknown'], type = 'index', value = '0-20%')
411
+
412
+ Resective_Surgery = gr.Dropdown(label = "Resective Surgery", choices = ['No', 'Yes', 'Unknown'], type = 'index', value = 'Yes')
413
+
414
+ Extent_of_Resection = gr.Dropdown(label = "Extent of Resection", choices = ['No resective surgery was performed', 'Gross total resection', 'Subtotal resection', 'Unknown'], type = 'index', value = 'Gross total resection')
415
+
416
+ Radiation_Treatment = gr.Dropdown(label = "Radiation Treatment", choices = ['No', 'Yes', 'Unknown'], type = 'index', value = 'Yes')
417
+
418
+ Chemotherapy = gr.Dropdown(label = "Chemotherapy", choices = ['No', 'Yes (single-agent chemotherapy)', 'Yes (multi-agent chemotherapy)', 'Yes (details unknown)', 'Unknown'], type = 'index', value = 'No')
419
+
420
+ Immunotherapy = gr.Dropdown(label = "Immunotherapy", choices = ['No', 'Yes', 'Unknown'], type = 'index', value = 'No')
421
+
422
+ with gr.Column():
423
+
424
+ with gr.Box():
425
+
426
+ gr.Markdown(
427
+ """
428
+ <center> <h2>12-Month Survival</h2> </center>
429
+ <br/>
430
+ <center> This model uses the LightGBM algorithm.</center>
431
+ <br/>
432
+ """
433
+ )
434
+
435
+ with gr.Row():
436
+ y1_predict_btn = gr.Button(value="Predict")
437
+
438
+ gr.Markdown(
439
+ """
440
+ <br/>
441
+ """
442
+ )
443
+
444
+ label1 = gr.Markdown()
445
+
446
+ gr.Markdown(
447
+ """
448
+ <br/>
449
+ """
450
+ )
451
+
452
+ with gr.Row():
453
+ y1_interpret_btn = gr.Button(value="Explain")
454
+
455
+ gr.Markdown(
456
+ """
457
+ <br/>
458
+ """
459
+ )
460
+
461
+ plot1 = gr.Plot()
462
+
463
+ gr.Markdown(
464
+ """
465
+ <br/>
466
+ """
467
+ )
468
+
469
+ with gr.Box():
470
+ gr.Markdown(
471
+ """
472
+ <center> <h2>24-Month Survival</h2> </center>
473
+ <br/>
474
+ <center> This model uses the LightGBM algorithm.</center>
475
+ <br/>
476
+ """
477
+ )
478
+
479
+ with gr.Row():
480
+ y2_predict_btn = gr.Button(value="Predict")
481
+
482
+ gr.Markdown(
483
+ """
484
+ <br/>
485
+ """
486
+ )
487
+
488
+ label2 = gr.Markdown()
489
+
490
+ gr.Markdown(
491
+ """
492
+ <br/>
493
+ """
494
+ )
495
+
496
+ with gr.Row():
497
+ y2_interpret_btn = gr.Button(value="Explain")
498
+
499
+ gr.Markdown(
500
+ """
501
+ <br/>
502
+ """
503
+ )
504
+
505
+ plot2 = gr.Plot()
506
+
507
+ gr.Markdown(
508
+ """
509
+ <br/>
510
+ """
511
+ )
512
+
513
+ with gr.Box():
514
+
515
+ gr.Markdown(
516
+ """
517
+ <center> <h2>36-Month Survival</h2> </center>
518
+ <br/>
519
+ <center> This model uses the LightGBM algorithm.</center>
520
+ <br/>
521
+ """
522
+ )
523
+
524
+ with gr.Row():
525
+ y3_predict_btn = gr.Button(value="Predict")
526
+
527
+ gr.Markdown(
528
+ """
529
+ <br/>
530
+ """
531
+ )
532
+
533
+ label3 = gr.Markdown()
534
+
535
+ gr.Markdown(
536
+ """
537
+ <br/>
538
+ """
539
+ )
540
+
541
+ with gr.Row():
542
+ y3_interpret_btn = gr.Button(value="Explain")
543
+
544
+ gr.Markdown(
545
+ """
546
+ <br/>
547
+ """
548
+ )
549
+
550
+ plot3 = gr.Plot()
551
+
552
+ gr.Markdown(
553
+ """
554
+ <br/>
555
+ """
556
+ )
557
+
558
+ with gr.Box():
559
+
560
+ gr.Markdown(
561
+ """
562
+ <center> <h2>60-Month Survival</h2> </center>
563
+ <br/>
564
+ <center> This model uses the LightGBM algorithm.</center>
565
+ <br/>
566
+ """
567
+ )
568
+
569
+ with gr.Row():
570
+ y4_predict_btn = gr.Button(value="Predict")
571
+
572
+ gr.Markdown(
573
+ """
574
+ <br/>
575
+ """
576
+ )
577
+
578
+ label4 = gr.Markdown()
579
+
580
+ gr.Markdown(
581
+ """
582
+ <br/>
583
+ """
584
+ )
585
+
586
+ with gr.Row():
587
+ y4_interpret_btn = gr.Button(value="Explain")
588
+
589
+ gr.Markdown(
590
+ """
591
+ <br/>
592
+ """
593
+ )
594
+
595
+ plot4 = gr.Plot()
596
+
597
+ gr.Markdown(
598
+ """
599
+ <br/>
600
+ """
601
+ )
602
+
603
+
604
+ y1_predict_btn.click(
605
+ y1_predict,
606
+ inputs = [Facility_Type,Facility_Location,Age,Sex,Race,Hispanic_Ethnicity,Primary_Payor,CharlsonDeyo_Score,Histology,Tumor_Localization,Laterality,Diagnostic_Biopsy,Grade,Ki67_Labeling_Index,Karnofsky_Performance_Scale,MGMT_Methylation,Focality,Tumor_Size,Chemotherapy,Immunotherapy,CoDeletion_1p19q,Resective_Surgery,Extent_of_Resection,Radiation_Treatment],
607
+ outputs = [label1]
608
+ )
609
+
610
+ y2_predict_btn.click(
611
+ y2_predict,
612
+ inputs = [Facility_Type,Facility_Location,Age,Sex,Race,Hispanic_Ethnicity,Primary_Payor,CharlsonDeyo_Score,Histology,Tumor_Localization,Laterality,Diagnostic_Biopsy,Grade,Ki67_Labeling_Index,Karnofsky_Performance_Scale,MGMT_Methylation,Focality,Tumor_Size,Chemotherapy,Immunotherapy,CoDeletion_1p19q,Resective_Surgery,Extent_of_Resection,Radiation_Treatment],
613
+ outputs = [label2]
614
+ )
615
+
616
+ y3_predict_btn.click(
617
+ y3_predict,
618
+ inputs = [Facility_Type,Facility_Location,Age,Sex,Race,Hispanic_Ethnicity,Primary_Payor,CharlsonDeyo_Score,Histology,Tumor_Localization,Laterality,Diagnostic_Biopsy,Grade,Ki67_Labeling_Index,Karnofsky_Performance_Scale,MGMT_Methylation,Focality,Tumor_Size,Chemotherapy,Immunotherapy,CoDeletion_1p19q,Resective_Surgery,Extent_of_Resection,Radiation_Treatment],
619
+ outputs = [label3]
620
+ )
621
+
622
+ y4_predict_btn.click(
623
+ y4_predict,
624
+ inputs = [Facility_Type,Facility_Location,Age,Sex,Race,Hispanic_Ethnicity,Primary_Payor,CharlsonDeyo_Score,Histology,Tumor_Localization,Laterality,Diagnostic_Biopsy,Grade,Ki67_Labeling_Index,Karnofsky_Performance_Scale,MGMT_Methylation,Focality,Tumor_Size,Chemotherapy,Immunotherapy,CoDeletion_1p19q,Resective_Surgery,Extent_of_Resection,Radiation_Treatment],
625
+ outputs = [label4]
626
+ )
627
+
628
+ y1_interpret_btn.click(
629
+ y1_interpret,
630
+ inputs = [Facility_Type,Facility_Location,Age,Sex,Race,Hispanic_Ethnicity,Primary_Payor,CharlsonDeyo_Score,Histology,Tumor_Localization,Laterality,Diagnostic_Biopsy,Grade,Ki67_Labeling_Index,Karnofsky_Performance_Scale,MGMT_Methylation,Focality,Tumor_Size,Chemotherapy,Immunotherapy,CoDeletion_1p19q,Resective_Surgery,Extent_of_Resection,Radiation_Treatment],
631
+ outputs = [plot1],
632
+ )
633
+
634
+ y2_interpret_btn.click(
635
+ y2_interpret,
636
+ inputs = [Facility_Type,Facility_Location,Age,Sex,Race,Hispanic_Ethnicity,Primary_Payor,CharlsonDeyo_Score,Histology,Tumor_Localization,Laterality,Diagnostic_Biopsy,Grade,Ki67_Labeling_Index,Karnofsky_Performance_Scale,MGMT_Methylation,Focality,Tumor_Size,Chemotherapy,Immunotherapy,CoDeletion_1p19q,Resective_Surgery,Extent_of_Resection,Radiation_Treatment],
637
+ outputs = [plot2],
638
+ )
639
+
640
+ y3_interpret_btn.click(
641
+ y3_interpret,
642
+ inputs = [Facility_Type,Facility_Location,Age,Sex,Race,Hispanic_Ethnicity,Primary_Payor,CharlsonDeyo_Score,Histology,Tumor_Localization,Laterality,Diagnostic_Biopsy,Grade,Ki67_Labeling_Index,Karnofsky_Performance_Scale,MGMT_Methylation,Focality,Tumor_Size,Chemotherapy,Immunotherapy,CoDeletion_1p19q,Resective_Surgery,Extent_of_Resection,Radiation_Treatment],
643
+ outputs = [plot3],
644
+ )
645
+
646
+ y4_interpret_btn.click(
647
+ y4_interpret,
648
+ inputs = [Facility_Type,Facility_Location,Age,Sex,Race,Hispanic_Ethnicity,Primary_Payor,CharlsonDeyo_Score,Histology,Tumor_Localization,Laterality,Diagnostic_Biopsy,Grade,Ki67_Labeling_Index,Karnofsky_Performance_Scale,MGMT_Methylation,Focality,Tumor_Size,Chemotherapy,Immunotherapy,CoDeletion_1p19q,Resective_Surgery,Extent_of_Resection,Radiation_Treatment],
649
+ outputs = [plot4],
650
+ )
651
+
652
+ gr.Markdown(
653
+ """
654
+ <center><h2>Disclaimer</h2>
655
+ <center>
656
+ The data utilized for this tool is sourced from the Commission on Cancer (CoC) of the American College of Surgeons and the American Cancer Society. These institutions, however, have not verified the information and are not responsible for the statistical validity of the data analysis or the conclusions drawn by the authors. This predictive tool, available on this webpage, is designed to provide general health information only and is not a substitute for professional medical advice, diagnosis, or treatment. It is strongly recommended that users consult with their own healthcare provider for any health-related concerns or issues. The authors make no warranties or representations, express or implied, regarding the accuracy, timeliness, relevance, or utility of the information contained in this tool. The health information in the prediction tool is subject to change and can be affected by various confounders, therefore it may be outdated, incomplete, or incorrect. No doctor-patient relationship is created by using this prediction tool and the authors have not validated its content. The authors do not record any specific user information or initiate contact with users. Before making any healthcare decisions or taking or refraining from any action based on the information in this prediction tool, it is advisable to seek professional advice from a healthcare provider. By using the prediction tool, users acknowledge and agree that neither the authors nor any other party will be liable for any decisions made, actions taken or not taken as a result of the information provided herein.
657
+ <br/>
658
+ <h4>By using this tool, you accept all of the above terms.<h4/>
659
+ </center>
660
+ """
661
+ )
662
+
663
+ demo.launch()
m12_data_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
m12_data_valid.csv ADDED
The diff for this file is too large to render. See raw diff
 
m24_data_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
m24_data_valid.csv ADDED
The diff for this file is too large to render. See raw diff
 
m36_data_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
m36_data_valid.csv ADDED
The diff for this file is too large to render. See raw diff
 
m60_data_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
m60_data_valid.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ numpy==1.23
2
+ matplotlib
3
+ xgboost
4
+ catboost
5
+ lightgbm
6
+ shap
7
+ randomgen
8
+ optuna
9
+ scipy