mertkarabacak commited on
Commit
e7701fa
1 Parent(s): 9e1a964

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -76
app.py CHANGED
@@ -140,88 +140,47 @@ unique_PRIMARYMETHODPAYMENT = ['Private/commercial insurance', 'Medicaid', 'Medi
140
  #Prepare data for the outcome 1 (mortality).
141
  y1 = x1.pop('OUTCOME')
142
  categorical_columns1 = list(x1.select_dtypes('object').columns)
143
- x1 = x1.astype({col: "category" for col in categorical_columns1})
144
- y1_data_xgb = xgb.DMatrix(x1, label=y1, enable_categorical=True)
145
- x1_lgb = x1.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
146
- y1_data_lgb = lgb.Dataset(x1_lgb, label=y1)
147
- y1_data_cb = Pool(data=x1, label=y1, cat_features=categorical_columns1)
148
- x1_rf = x1
149
- categorical_columns1 = list(x1_rf.select_dtypes('category').columns)
150
- x1_rf = x1_rf.astype({col: "category" for col in categorical_columns1})
151
  le = sklearn.preprocessing.LabelEncoder()
152
  for col in categorical_columns1:
153
- x1_rf[col] = le.fit_transform(x1_rf[col].astype(str))
154
- d1 = dict.fromkeys(x1_rf.select_dtypes(np.int64).columns, str)
155
- x1_rf = x1_rf.astype(d1)
156
 
157
  #Prepare data for the outcome 2 (discharge).
158
  y2 = x2.pop('OUTCOME')
159
  categorical_columns2 = list(x2.select_dtypes('object').columns)
160
- x2 = x2.astype({col: "category" for col in categorical_columns2})
161
- y2_data_xgb = xgb.DMatrix(x2, label=y2, enable_categorical=True)
162
- x2_lgb = x2.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
163
- y2_data_lgb = lgb.Dataset(x2_lgb, label=y2)
164
- y2_data_cb = Pool(data=x2, label=y2, cat_features=categorical_columns2)
165
- x2_rf = x2
166
- categorical_columns2 = list(x2_rf.select_dtypes('category').columns)
167
- x2_rf = x2_rf.astype({col: "category" for col in categorical_columns2})
168
  le = sklearn.preprocessing.LabelEncoder()
169
  for col in categorical_columns2:
170
- x2_rf[col] = le.fit_transform(x2_rf[col].astype(str))
171
- d2 = dict.fromkeys(x2_rf.select_dtypes(np.int64).columns, str)
172
- x2_rf = x2_rf.astype(d2)
173
 
174
  #Prepare data for the outcome 3 (LOS).
175
  y3 = x3.pop('OUTCOME')
176
  categorical_columns3 = list(x3.select_dtypes('object').columns)
177
- x3 = x3.astype({col: "category" for col in categorical_columns3})
178
- y3_data_xgb = xgb.DMatrix(x3, label=y3, enable_categorical=True)
179
- x3_lgb = x3.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
180
- y3_data_lgb = lgb.Dataset(x3_lgb, label=y3)
181
- y3_data_cb = Pool(data=x3, label=y3, cat_features=categorical_columns3)
182
- x3_rf = x3
183
- categorical_columns3 = list(x3_rf.select_dtypes('category').columns)
184
- x3_rf = x3_rf.astype({col: "category" for col in categorical_columns3})
185
  le = sklearn.preprocessing.LabelEncoder()
186
  for col in categorical_columns3:
187
- x3_rf[col] = le.fit_transform(x3_rf[col].astype(str))
188
- d3 = dict.fromkeys(x3_rf.select_dtypes(np.int64).columns, str)
189
- x3_rf = x3_rf.astype(d3)
190
 
191
  #Prepare data for the outcome 4 (ICU LOS).
192
  y4 = x4.pop('OUTCOME')
193
  categorical_columns4 = list(x4.select_dtypes('object').columns)
194
- x4 = x4.astype({col: "category" for col in categorical_columns4})
195
- y4_data_xgb = xgb.DMatrix(x4, label=y4, enable_categorical=True)
196
- x4_lgb = x4.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
197
- y4_data_lgb = lgb.Dataset(x4_lgb, label=y4)
198
- y4_data_cb = Pool(data=x4, label=y4, cat_features=categorical_columns4)
199
- x4_rf = x4
200
- categorical_columns4 = list(x4_rf.select_dtypes('category').columns)
201
- x4_rf = x4_rf.astype({col: "category" for col in categorical_columns4})
202
  le = sklearn.preprocessing.LabelEncoder()
203
  for col in categorical_columns4:
204
- x4_rf[col] = le.fit_transform(x4_rf[col].astype(str))
205
- d4 = dict.fromkeys(x4_rf.select_dtypes(np.int64).columns, str)
206
- x4_rf = x4_rf.astype(d4)
207
 
208
  #Prepare data for the outcome 5 (complications).
209
  y5 = x5.pop('OUTCOME')
210
  categorical_columns5 = list(x5.select_dtypes('object').columns)
211
- x5 = x5.astype({col: "category" for col in categorical_columns5})
212
- y5_data_xgb = xgb.DMatrix(x5, label=y5, enable_categorical=True)
213
- x5_lgb = x5.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
214
- y5_data_lgb = lgb.Dataset(x5_lgb, label=y5)
215
- y5_data_cb = Pool(data=x5, label=y5, cat_features=categorical_columns5)
216
- x5_rf = x5
217
- categorical_columns5 = list(x5_rf.select_dtypes('category').columns)
218
- x5_rf = x5_rf.astype({col: "category" for col in categorical_columns5})
219
  le = sklearn.preprocessing.LabelEncoder()
220
  for col in categorical_columns5:
221
- x5_rf[col] = le.fit_transform(x5_rf[col].astype(str))
222
- d5 = dict.fromkeys(x5_rf.select_dtypes(np.int64).columns, str)
223
- x5_rf = x5_rf.astype(d5)
224
-
225
 
226
  #Assign hyperparameters.
227
  y1_params = {'objective': 'binary:logistic', 'booster': 'gbtree', 'lambda': 0.5059844209148782, 'alpha': 0.0030156848979492556, 'max_depth': 2, 'eta': 4.546875002603483e-07, 'gamma': 1.1982641538268563e-08, 'grow_policy': 'lossguide', 'eval_metric': 'auc', 'verbosity': 0, 'seed': 31}
@@ -233,36 +192,47 @@ y5_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'lambda_l1': 0.001
233
 
234
 
235
  #Training models.
236
- y1_model_xgb = xgb.train(params = y1_params, dtrain = y1_data_xgb)
237
- y1_explainer_xgb = shap.TreeExplainer(y1_model_xgb)
 
 
238
 
239
  from sklearn.ensemble import RandomForestClassifier as rf
240
  y2_rf = rf(**y2_params)
241
- y2_model_rf = y2_rf.fit(x2_rf, y2)
242
  y2_explainer_rf = shap.TreeExplainer(y2_model_rf)
243
 
244
- y3_model_xgb = xgb.train(params = y3_params, dtrain = y3_data_xgb)
245
- y3_explainer_xgb = shap.TreeExplainer(y1_model_xgb)
 
 
246
 
247
- y4_model_lgb = lgb.train(params = y4_params, train_set = y4_data_lgb)
 
 
248
  y4_explainer_lgb = shap.TreeExplainer(y4_model_lgb)
249
 
250
- y5_model_lgb = lgb.train(params=y5_params, train_set = y5_data_lgb)
 
 
251
  y5_explainer_lgb = shap.TreeExplainer(y5_model_lgb)
252
 
253
-
254
  #Define predict for y1 (mortality).
255
  def y1_predict_xgb(*args):
256
  df1 = pd.DataFrame([args], columns=x1.columns)
257
- df1 = df1.astype({col: "category" for col in categorical_columns1})
258
- pos_pred = y1_model_xgb.predict_proba(xgb.DMatrix(df1, enable_categorical=True))
259
- return {"Mortality": float(pos_pred[0]), "No Mortality": 1 - float(pos_pred[0])}
 
 
260
 
261
  def y1_predict_lgb(*args):
262
- df1 = pd.DataFrame([args], columns=x1_lgb.columns)
263
- df1 = df1.astype({col: "category" for col in categorical_columns1})
264
- pos_pred = y1_model_lgb.predict(df1)
265
- return {"Mortality": float(pos_pred[0]), "No Mortality": 1 - float(pos_pred[0])}
 
 
266
 
267
  def y1_predict_cb(*args):
268
  df1 = pd.DataFrame([args], columns=x1.columns)
@@ -274,7 +244,7 @@ def y1_predict_rf(*args):
274
  df1 = pd.DataFrame([args], columns=x1_rf.columns)
275
  df1 = df.astype({col: "category" for col in categorical_columns1})
276
  d1 = dict.fromkeys(df1.select_dtypes(np.int64).columns, np.int32)
277
- d1f = df1.astype(d1)
278
  pos_pred = y1_model_rf.predict_proba(df1)
279
  return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
280
 
@@ -282,7 +252,7 @@ def y1_predict_rf(*args):
282
  def y2_predict_xgb(*args):
283
  df2 = pd.DataFrame([args], columns=x2.columns)
284
  df2 = df2.astype({col: "category" for col in categorical_columns2})
285
- pos_pred = y2_model_xgb.predict_proba(xgb.DMatrix(df2, enable_categorical=True))
286
  return {"Facility Discharge": float(pos_pred[0]), "Home Discharge": 1 - float(pos_pred[0])}
287
 
288
  def y2_predict_lgb(*args):
@@ -309,7 +279,7 @@ def y2_predict_rf(*args):
309
  def y3_predict_xgb(*args):
310
  df3 = pd.DataFrame([args], columns=x3.columns)
311
  df3 = df3.astype({col: "category" for col in categorical_columns3})
312
- pos_pred = y3_model_xgb.predict_proba(xgb.DMatrix(df3, enable_categorical=True))
313
  return {"Prolonged LOS": float(pos_pred[0]), "No Prolonged LOS": 1 - float(pos_pred[0])}
314
 
315
  def y3_predict_lgb(*args):
@@ -336,7 +306,7 @@ def y3_predict_rf(*args):
336
  def y4_predict_xgb(*args):
337
  df4 = pd.DataFrame([args], columns=x4.columns)
338
  df4 = df4.astype({col: "category" for col in categorical_columns4})
339
- pos_pred = y4_model_xgb.predict_proba(xgb.DMatrix(df4, enable_categorical=True))
340
  return {"Prolonged ICU LOS": float(pos_pred[0]), "No Prolonged ICU LOS": 1 - float(pos_pred[0])}
341
 
342
  def y4_predict_lgb(*args):
@@ -363,7 +333,7 @@ def y4_predict_rf(*args):
363
  def y5_predict_xgb(*args):
364
  df5 = pd.DataFrame([args], columns=x5.columns)
365
  df5 = df5.astype({col: "category" for col in categorical_columns5})
366
- pos_pred = y5_model_xgb.predict_proba(xgb.DMatrix(df5, enable_categorical=True))
367
  return {"Major Complications": float(pos_pred[0]), "No Major Complications": 1 - float(pos_pred[0])}
368
 
369
  def y5_predict_lgb(*args):
 
140
  #Prepare data for the outcome 1 (mortality).
141
  y1 = x1.pop('OUTCOME')
142
  categorical_columns1 = list(x1.select_dtypes('object').columns)
 
 
 
 
 
 
 
 
143
  le = sklearn.preprocessing.LabelEncoder()
144
  for col in categorical_columns1:
145
+ x1[col] = le.fit_transform(rf[col].astype(str))
146
+ d1 = dict.fromkeys(x1.select_dtypes(np.int64).columns, str)x1_rf = x1_rf.astype(d1)
147
+ x1 = x1.astype(d1)
148
 
149
  #Prepare data for the outcome 2 (discharge).
150
  y2 = x2.pop('OUTCOME')
151
  categorical_columns2 = list(x2.select_dtypes('object').columns)
 
 
 
 
 
 
 
 
152
  le = sklearn.preprocessing.LabelEncoder()
153
  for col in categorical_columns2:
154
+ x2[col] = le.fit_transform(rf[col].astype(str))
155
+ d2 = dict.fromkeys(x2.select_dtypes(np.int64).columns, str)x2_rf = x2_rf.astype(d2)
156
+ x2 = x2.astype(d2)
157
 
158
  #Prepare data for the outcome 3 (LOS).
159
  y3 = x3.pop('OUTCOME')
160
  categorical_columns3 = list(x3.select_dtypes('object').columns)
 
 
 
 
 
 
 
 
161
  le = sklearn.preprocessing.LabelEncoder()
162
  for col in categorical_columns3:
163
+ x3[col] = le.fit_transform(rf[col].astype(str))
164
+ d3 = dict.fromkeys(x3.select_dtypes(np.int64).columns, str)x3_rf = x3_rf.astype(d3)
165
+ x3 = x3.astype(d3)
166
 
167
  #Prepare data for the outcome 4 (ICU LOS).
168
  y4 = x4.pop('OUTCOME')
169
  categorical_columns4 = list(x4.select_dtypes('object').columns)
 
 
 
 
 
 
 
 
170
  le = sklearn.preprocessing.LabelEncoder()
171
  for col in categorical_columns4:
172
+ x4[col] = le.fit_transform(rf[col].astype(str))
173
+ d4 = dict.fromkeys(x4.select_dtypes(np.int64).columns, str)x4_rf = x4_rf.astype(d4)
174
+ x4 = x4.astype(d4)
175
 
176
  #Prepare data for the outcome 5 (complications).
177
  y5 = x5.pop('OUTCOME')
178
  categorical_columns5 = list(x5.select_dtypes('object').columns)
 
 
 
 
 
 
 
 
179
  le = sklearn.preprocessing.LabelEncoder()
180
  for col in categorical_columns5:
181
+ x5[col] = le.fit_transform(rf[col].astype(str))
182
+ d5 = dict.fromkeys(x5.select_dtypes(np.int64).columns, str)x5_rf = x5_rf.astype(d5)
183
+ x5 = x5.astype(d5)
 
184
 
185
  #Assign hyperparameters.
186
  y1_params = {'objective': 'binary:logistic', 'booster': 'gbtree', 'lambda': 0.5059844209148782, 'alpha': 0.0030156848979492556, 'max_depth': 2, 'eta': 4.546875002603483e-07, 'gamma': 1.1982641538268563e-08, 'grow_policy': 'lossguide', 'eval_metric': 'auc', 'verbosity': 0, 'seed': 31}
 
192
 
193
 
194
  #Training models.
195
+ from xgboost import XGBClassifier
196
+ y1_xgb = XGBClassifier(**y1_params)
197
+ y1_model_xgb = y1_xgb.fit(x1, y1)
198
+ y1_explainer_xgb = shap.TreeExplainer(y2_model_xgb)
199
 
200
  from sklearn.ensemble import RandomForestClassifier as rf
201
  y2_rf = rf(**y2_params)
202
+ y2_model_rf = y2_rf.fit(x2, y2)
203
  y2_explainer_rf = shap.TreeExplainer(y2_model_rf)
204
 
205
+ from xgboost import XGBClassifier
206
+ y3_xgb = XGBClassifier(**y3_params)
207
+ y3_model_xgb = y3_xgb.fit(x3, y3)
208
+ y3_explainer_xgb = shap.TreeExplainer(y3_model_xgb)
209
 
210
+ from lightgbm import LGBMClassifier
211
+ lgb = LGBMClassifier(**y4_params)
212
+ y4_model_lgb = lgb.fit(x4, y4)
213
  y4_explainer_lgb = shap.TreeExplainer(y4_model_lgb)
214
 
215
+ from lightgbm import LGBMClassifier
216
+ lgb = LGBMClassifier(**y5_params)
217
+ y5_model_lgb = lgb.fit(x5, y5)
218
  y5_explainer_lgb = shap.TreeExplainer(y5_model_lgb)
219
 
 
220
  #Define predict for y1 (mortality).
221
  def y1_predict_xgb(*args):
222
  df1 = pd.DataFrame([args], columns=x1.columns)
223
+ df1 = df.astype({col: "category" for col in categorical_columns1})
224
+ d1 = dict.fromkeys(df1.select_dtypes(np.int64).columns, np.int32)
225
+ df1 = df1.astype(d1)
226
+ pos_pred = y1_model_xgb.predict_proba(df1)
227
+ return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
228
 
229
  def y1_predict_lgb(*args):
230
+ df1 = pd.DataFrame([args], columns=x1.columns)
231
+ df1 = df.astype({col: "category" for col in categorical_columns1})
232
+ d1 = dict.fromkeys(df1.select_dtypes(np.int64).columns, np.int32)
233
+ df1 = df1.astype(d1)
234
+ pos_pred = y1_model_lgb.predict_proba(df1)
235
+ return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
236
 
237
  def y1_predict_cb(*args):
238
  df1 = pd.DataFrame([args], columns=x1.columns)
 
244
  df1 = pd.DataFrame([args], columns=x1_rf.columns)
245
  df1 = df.astype({col: "category" for col in categorical_columns1})
246
  d1 = dict.fromkeys(df1.select_dtypes(np.int64).columns, np.int32)
247
+ df1 = df1.astype(d1)
248
  pos_pred = y1_model_rf.predict_proba(df1)
249
  return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
250
 
 
252
  def y2_predict_xgb(*args):
253
  df2 = pd.DataFrame([args], columns=x2.columns)
254
  df2 = df2.astype({col: "category" for col in categorical_columns2})
255
+ pos_pred = y2_model_xgb.predict(xgb.DMatrix(df2, enable_categorical=True))
256
  return {"Facility Discharge": float(pos_pred[0]), "Home Discharge": 1 - float(pos_pred[0])}
257
 
258
  def y2_predict_lgb(*args):
 
279
  def y3_predict_xgb(*args):
280
  df3 = pd.DataFrame([args], columns=x3.columns)
281
  df3 = df3.astype({col: "category" for col in categorical_columns3})
282
+ pos_pred = y3_model_xgb.predict(xgb.DMatrix(df3, enable_categorical=True))
283
  return {"Prolonged LOS": float(pos_pred[0]), "No Prolonged LOS": 1 - float(pos_pred[0])}
284
 
285
  def y3_predict_lgb(*args):
 
306
  def y4_predict_xgb(*args):
307
  df4 = pd.DataFrame([args], columns=x4.columns)
308
  df4 = df4.astype({col: "category" for col in categorical_columns4})
309
+ pos_pred = y4_model_xgb.predict(xgb.DMatrix(df4, enable_categorical=True))
310
  return {"Prolonged ICU LOS": float(pos_pred[0]), "No Prolonged ICU LOS": 1 - float(pos_pred[0])}
311
 
312
  def y4_predict_lgb(*args):
 
333
  def y5_predict_xgb(*args):
334
  df5 = pd.DataFrame([args], columns=x5.columns)
335
  df5 = df5.astype({col: "category" for col in categorical_columns5})
336
+ pos_pred = y5_model_xgb.predict(xgb.DMatrix(df5, enable_categorical=True))
337
  return {"Major Complications": float(pos_pred[0]), "No Major Complications": 1 - float(pos_pred[0])}
338
 
339
  def y5_predict_lgb(*args):