mertkarabacak
commited on
Commit
•
e7701fa
1
Parent(s):
9e1a964
Upload app.py
Browse files
app.py
CHANGED
@@ -140,88 +140,47 @@ unique_PRIMARYMETHODPAYMENT = ['Private/commercial insurance', 'Medicaid', 'Medi
|
|
140 |
#Prepare data for the outcome 1 (mortality).
|
141 |
y1 = x1.pop('OUTCOME')
|
142 |
categorical_columns1 = list(x1.select_dtypes('object').columns)
|
143 |
-
x1 = x1.astype({col: "category" for col in categorical_columns1})
|
144 |
-
y1_data_xgb = xgb.DMatrix(x1, label=y1, enable_categorical=True)
|
145 |
-
x1_lgb = x1.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
|
146 |
-
y1_data_lgb = lgb.Dataset(x1_lgb, label=y1)
|
147 |
-
y1_data_cb = Pool(data=x1, label=y1, cat_features=categorical_columns1)
|
148 |
-
x1_rf = x1
|
149 |
-
categorical_columns1 = list(x1_rf.select_dtypes('category').columns)
|
150 |
-
x1_rf = x1_rf.astype({col: "category" for col in categorical_columns1})
|
151 |
le = sklearn.preprocessing.LabelEncoder()
|
152 |
for col in categorical_columns1:
|
153 |
-
|
154 |
-
d1 = dict.fromkeys(
|
155 |
-
|
156 |
|
157 |
#Prepare data for the outcome 2 (discharge).
|
158 |
y2 = x2.pop('OUTCOME')
|
159 |
categorical_columns2 = list(x2.select_dtypes('object').columns)
|
160 |
-
x2 = x2.astype({col: "category" for col in categorical_columns2})
|
161 |
-
y2_data_xgb = xgb.DMatrix(x2, label=y2, enable_categorical=True)
|
162 |
-
x2_lgb = x2.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
|
163 |
-
y2_data_lgb = lgb.Dataset(x2_lgb, label=y2)
|
164 |
-
y2_data_cb = Pool(data=x2, label=y2, cat_features=categorical_columns2)
|
165 |
-
x2_rf = x2
|
166 |
-
categorical_columns2 = list(x2_rf.select_dtypes('category').columns)
|
167 |
-
x2_rf = x2_rf.astype({col: "category" for col in categorical_columns2})
|
168 |
le = sklearn.preprocessing.LabelEncoder()
|
169 |
for col in categorical_columns2:
|
170 |
-
|
171 |
-
d2 = dict.fromkeys(
|
172 |
-
|
173 |
|
174 |
#Prepare data for the outcome 3 (LOS).
|
175 |
y3 = x3.pop('OUTCOME')
|
176 |
categorical_columns3 = list(x3.select_dtypes('object').columns)
|
177 |
-
x3 = x3.astype({col: "category" for col in categorical_columns3})
|
178 |
-
y3_data_xgb = xgb.DMatrix(x3, label=y3, enable_categorical=True)
|
179 |
-
x3_lgb = x3.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
|
180 |
-
y3_data_lgb = lgb.Dataset(x3_lgb, label=y3)
|
181 |
-
y3_data_cb = Pool(data=x3, label=y3, cat_features=categorical_columns3)
|
182 |
-
x3_rf = x3
|
183 |
-
categorical_columns3 = list(x3_rf.select_dtypes('category').columns)
|
184 |
-
x3_rf = x3_rf.astype({col: "category" for col in categorical_columns3})
|
185 |
le = sklearn.preprocessing.LabelEncoder()
|
186 |
for col in categorical_columns3:
|
187 |
-
|
188 |
-
d3 = dict.fromkeys(
|
189 |
-
|
190 |
|
191 |
#Prepare data for the outcome 4 (ICU LOS).
|
192 |
y4 = x4.pop('OUTCOME')
|
193 |
categorical_columns4 = list(x4.select_dtypes('object').columns)
|
194 |
-
x4 = x4.astype({col: "category" for col in categorical_columns4})
|
195 |
-
y4_data_xgb = xgb.DMatrix(x4, label=y4, enable_categorical=True)
|
196 |
-
x4_lgb = x4.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
|
197 |
-
y4_data_lgb = lgb.Dataset(x4_lgb, label=y4)
|
198 |
-
y4_data_cb = Pool(data=x4, label=y4, cat_features=categorical_columns4)
|
199 |
-
x4_rf = x4
|
200 |
-
categorical_columns4 = list(x4_rf.select_dtypes('category').columns)
|
201 |
-
x4_rf = x4_rf.astype({col: "category" for col in categorical_columns4})
|
202 |
le = sklearn.preprocessing.LabelEncoder()
|
203 |
for col in categorical_columns4:
|
204 |
-
|
205 |
-
d4 = dict.fromkeys(
|
206 |
-
|
207 |
|
208 |
#Prepare data for the outcome 5 (complications).
|
209 |
y5 = x5.pop('OUTCOME')
|
210 |
categorical_columns5 = list(x5.select_dtypes('object').columns)
|
211 |
-
x5 = x5.astype({col: "category" for col in categorical_columns5})
|
212 |
-
y5_data_xgb = xgb.DMatrix(x5, label=y5, enable_categorical=True)
|
213 |
-
x5_lgb = x5.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
|
214 |
-
y5_data_lgb = lgb.Dataset(x5_lgb, label=y5)
|
215 |
-
y5_data_cb = Pool(data=x5, label=y5, cat_features=categorical_columns5)
|
216 |
-
x5_rf = x5
|
217 |
-
categorical_columns5 = list(x5_rf.select_dtypes('category').columns)
|
218 |
-
x5_rf = x5_rf.astype({col: "category" for col in categorical_columns5})
|
219 |
le = sklearn.preprocessing.LabelEncoder()
|
220 |
for col in categorical_columns5:
|
221 |
-
|
222 |
-
d5 = dict.fromkeys(
|
223 |
-
|
224 |
-
|
225 |
|
226 |
#Assign hyperparameters.
|
227 |
y1_params = {'objective': 'binary:logistic', 'booster': 'gbtree', 'lambda': 0.5059844209148782, 'alpha': 0.0030156848979492556, 'max_depth': 2, 'eta': 4.546875002603483e-07, 'gamma': 1.1982641538268563e-08, 'grow_policy': 'lossguide', 'eval_metric': 'auc', 'verbosity': 0, 'seed': 31}
|
@@ -233,36 +192,47 @@ y5_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'lambda_l1': 0.001
|
|
233 |
|
234 |
|
235 |
#Training models.
|
236 |
-
|
237 |
-
|
|
|
|
|
238 |
|
239 |
from sklearn.ensemble import RandomForestClassifier as rf
|
240 |
y2_rf = rf(**y2_params)
|
241 |
-
y2_model_rf = y2_rf.fit(
|
242 |
y2_explainer_rf = shap.TreeExplainer(y2_model_rf)
|
243 |
|
244 |
-
|
245 |
-
|
|
|
|
|
246 |
|
247 |
-
|
|
|
|
|
248 |
y4_explainer_lgb = shap.TreeExplainer(y4_model_lgb)
|
249 |
|
250 |
-
|
|
|
|
|
251 |
y5_explainer_lgb = shap.TreeExplainer(y5_model_lgb)
|
252 |
|
253 |
-
|
254 |
#Define predict for y1 (mortality).
|
255 |
def y1_predict_xgb(*args):
|
256 |
df1 = pd.DataFrame([args], columns=x1.columns)
|
257 |
-
df1 =
|
258 |
-
|
259 |
-
|
|
|
|
|
260 |
|
261 |
def y1_predict_lgb(*args):
|
262 |
-
df1 = pd.DataFrame([args], columns=
|
263 |
-
df1 =
|
264 |
-
|
265 |
-
|
|
|
|
|
266 |
|
267 |
def y1_predict_cb(*args):
|
268 |
df1 = pd.DataFrame([args], columns=x1.columns)
|
@@ -274,7 +244,7 @@ def y1_predict_rf(*args):
|
|
274 |
df1 = pd.DataFrame([args], columns=x1_rf.columns)
|
275 |
df1 = df.astype({col: "category" for col in categorical_columns1})
|
276 |
d1 = dict.fromkeys(df1.select_dtypes(np.int64).columns, np.int32)
|
277 |
-
|
278 |
pos_pred = y1_model_rf.predict_proba(df1)
|
279 |
return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
|
280 |
|
@@ -282,7 +252,7 @@ def y1_predict_rf(*args):
|
|
282 |
def y2_predict_xgb(*args):
|
283 |
df2 = pd.DataFrame([args], columns=x2.columns)
|
284 |
df2 = df2.astype({col: "category" for col in categorical_columns2})
|
285 |
-
pos_pred = y2_model_xgb.
|
286 |
return {"Facility Discharge": float(pos_pred[0]), "Home Discharge": 1 - float(pos_pred[0])}
|
287 |
|
288 |
def y2_predict_lgb(*args):
|
@@ -309,7 +279,7 @@ def y2_predict_rf(*args):
|
|
309 |
def y3_predict_xgb(*args):
|
310 |
df3 = pd.DataFrame([args], columns=x3.columns)
|
311 |
df3 = df3.astype({col: "category" for col in categorical_columns3})
|
312 |
-
pos_pred = y3_model_xgb.
|
313 |
return {"Prolonged LOS": float(pos_pred[0]), "No Prolonged LOS": 1 - float(pos_pred[0])}
|
314 |
|
315 |
def y3_predict_lgb(*args):
|
@@ -336,7 +306,7 @@ def y3_predict_rf(*args):
|
|
336 |
def y4_predict_xgb(*args):
|
337 |
df4 = pd.DataFrame([args], columns=x4.columns)
|
338 |
df4 = df4.astype({col: "category" for col in categorical_columns4})
|
339 |
-
pos_pred = y4_model_xgb.
|
340 |
return {"Prolonged ICU LOS": float(pos_pred[0]), "No Prolonged ICU LOS": 1 - float(pos_pred[0])}
|
341 |
|
342 |
def y4_predict_lgb(*args):
|
@@ -363,7 +333,7 @@ def y4_predict_rf(*args):
|
|
363 |
def y5_predict_xgb(*args):
|
364 |
df5 = pd.DataFrame([args], columns=x5.columns)
|
365 |
df5 = df5.astype({col: "category" for col in categorical_columns5})
|
366 |
-
pos_pred = y5_model_xgb.
|
367 |
return {"Major Complications": float(pos_pred[0]), "No Major Complications": 1 - float(pos_pred[0])}
|
368 |
|
369 |
def y5_predict_lgb(*args):
|
|
|
140 |
#Prepare data for the outcome 1 (mortality).
|
141 |
y1 = x1.pop('OUTCOME')
|
142 |
categorical_columns1 = list(x1.select_dtypes('object').columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
le = sklearn.preprocessing.LabelEncoder()
|
144 |
for col in categorical_columns1:
|
145 |
+
x1[col] = le.fit_transform(rf[col].astype(str))
|
146 |
+
d1 = dict.fromkeys(x1.select_dtypes(np.int64).columns, str)x1_rf = x1_rf.astype(d1)
|
147 |
+
x1 = x1.astype(d1)
|
148 |
|
149 |
#Prepare data for the outcome 2 (discharge).
|
150 |
y2 = x2.pop('OUTCOME')
|
151 |
categorical_columns2 = list(x2.select_dtypes('object').columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
le = sklearn.preprocessing.LabelEncoder()
|
153 |
for col in categorical_columns2:
|
154 |
+
x2[col] = le.fit_transform(rf[col].astype(str))
|
155 |
+
d2 = dict.fromkeys(x2.select_dtypes(np.int64).columns, str)x2_rf = x2_rf.astype(d2)
|
156 |
+
x2 = x2.astype(d2)
|
157 |
|
158 |
#Prepare data for the outcome 3 (LOS).
|
159 |
y3 = x3.pop('OUTCOME')
|
160 |
categorical_columns3 = list(x3.select_dtypes('object').columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
le = sklearn.preprocessing.LabelEncoder()
|
162 |
for col in categorical_columns3:
|
163 |
+
x3[col] = le.fit_transform(rf[col].astype(str))
|
164 |
+
d3 = dict.fromkeys(x3.select_dtypes(np.int64).columns, str)x3_rf = x3_rf.astype(d3)
|
165 |
+
x3 = x3.astype(d3)
|
166 |
|
167 |
#Prepare data for the outcome 4 (ICU LOS).
|
168 |
y4 = x4.pop('OUTCOME')
|
169 |
categorical_columns4 = list(x4.select_dtypes('object').columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
le = sklearn.preprocessing.LabelEncoder()
|
171 |
for col in categorical_columns4:
|
172 |
+
x4[col] = le.fit_transform(rf[col].astype(str))
|
173 |
+
d4 = dict.fromkeys(x4.select_dtypes(np.int64).columns, str)x4_rf = x4_rf.astype(d4)
|
174 |
+
x4 = x4.astype(d4)
|
175 |
|
176 |
#Prepare data for the outcome 5 (complications).
|
177 |
y5 = x5.pop('OUTCOME')
|
178 |
categorical_columns5 = list(x5.select_dtypes('object').columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
le = sklearn.preprocessing.LabelEncoder()
|
180 |
for col in categorical_columns5:
|
181 |
+
x5[col] = le.fit_transform(rf[col].astype(str))
|
182 |
+
d5 = dict.fromkeys(x5.select_dtypes(np.int64).columns, str)x5_rf = x5_rf.astype(d5)
|
183 |
+
x5 = x5.astype(d5)
|
|
|
184 |
|
185 |
#Assign hyperparameters.
|
186 |
y1_params = {'objective': 'binary:logistic', 'booster': 'gbtree', 'lambda': 0.5059844209148782, 'alpha': 0.0030156848979492556, 'max_depth': 2, 'eta': 4.546875002603483e-07, 'gamma': 1.1982641538268563e-08, 'grow_policy': 'lossguide', 'eval_metric': 'auc', 'verbosity': 0, 'seed': 31}
|
|
|
192 |
|
193 |
|
194 |
#Training models.
|
195 |
+
from xgboost import XGBClassifier
|
196 |
+
y1_xgb = XGBClassifier(**y1_params)
|
197 |
+
y1_model_xgb = y1_xgb.fit(x1, y1)
|
198 |
+
y1_explainer_xgb = shap.TreeExplainer(y2_model_xgb)
|
199 |
|
200 |
from sklearn.ensemble import RandomForestClassifier as rf
|
201 |
y2_rf = rf(**y2_params)
|
202 |
+
y2_model_rf = y2_rf.fit(x2, y2)
|
203 |
y2_explainer_rf = shap.TreeExplainer(y2_model_rf)
|
204 |
|
205 |
+
from xgboost import XGBClassifier
|
206 |
+
y3_xgb = XGBClassifier(**y3_params)
|
207 |
+
y3_model_xgb = y3_xgb.fit(x3, y3)
|
208 |
+
y3_explainer_xgb = shap.TreeExplainer(y3_model_xgb)
|
209 |
|
210 |
+
from lightgbm import LGBMClassifier
|
211 |
+
lgb = LGBMClassifier(**y4_params)
|
212 |
+
y4_model_lgb = lgb.fit(x4, y4)
|
213 |
y4_explainer_lgb = shap.TreeExplainer(y4_model_lgb)
|
214 |
|
215 |
+
from lightgbm import LGBMClassifier
|
216 |
+
lgb = LGBMClassifier(**y5_params)
|
217 |
+
y5_model_lgb = lgb.fit(x5, y5)
|
218 |
y5_explainer_lgb = shap.TreeExplainer(y5_model_lgb)
|
219 |
|
|
|
220 |
#Define predict for y1 (mortality).
|
221 |
def y1_predict_xgb(*args):
|
222 |
df1 = pd.DataFrame([args], columns=x1.columns)
|
223 |
+
df1 = df.astype({col: "category" for col in categorical_columns1})
|
224 |
+
d1 = dict.fromkeys(df1.select_dtypes(np.int64).columns, np.int32)
|
225 |
+
df1 = df1.astype(d1)
|
226 |
+
pos_pred = y1_model_xgb.predict_proba(df1)
|
227 |
+
return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
|
228 |
|
229 |
def y1_predict_lgb(*args):
|
230 |
+
df1 = pd.DataFrame([args], columns=x1.columns)
|
231 |
+
df1 = df.astype({col: "category" for col in categorical_columns1})
|
232 |
+
d1 = dict.fromkeys(df1.select_dtypes(np.int64).columns, np.int32)
|
233 |
+
df1 = df1.astype(d1)
|
234 |
+
pos_pred = y1_model_lgb.predict_proba(df1)
|
235 |
+
return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
|
236 |
|
237 |
def y1_predict_cb(*args):
|
238 |
df1 = pd.DataFrame([args], columns=x1.columns)
|
|
|
244 |
df1 = pd.DataFrame([args], columns=x1_rf.columns)
|
245 |
df1 = df.astype({col: "category" for col in categorical_columns1})
|
246 |
d1 = dict.fromkeys(df1.select_dtypes(np.int64).columns, np.int32)
|
247 |
+
df1 = df1.astype(d1)
|
248 |
pos_pred = y1_model_rf.predict_proba(df1)
|
249 |
return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
|
250 |
|
|
|
252 |
def y2_predict_xgb(*args):
|
253 |
df2 = pd.DataFrame([args], columns=x2.columns)
|
254 |
df2 = df2.astype({col: "category" for col in categorical_columns2})
|
255 |
+
pos_pred = y2_model_xgb.predict(xgb.DMatrix(df2, enable_categorical=True))
|
256 |
return {"Facility Discharge": float(pos_pred[0]), "Home Discharge": 1 - float(pos_pred[0])}
|
257 |
|
258 |
def y2_predict_lgb(*args):
|
|
|
279 |
def y3_predict_xgb(*args):
|
280 |
df3 = pd.DataFrame([args], columns=x3.columns)
|
281 |
df3 = df3.astype({col: "category" for col in categorical_columns3})
|
282 |
+
pos_pred = y3_model_xgb.predict(xgb.DMatrix(df3, enable_categorical=True))
|
283 |
return {"Prolonged LOS": float(pos_pred[0]), "No Prolonged LOS": 1 - float(pos_pred[0])}
|
284 |
|
285 |
def y3_predict_lgb(*args):
|
|
|
306 |
def y4_predict_xgb(*args):
|
307 |
df4 = pd.DataFrame([args], columns=x4.columns)
|
308 |
df4 = df4.astype({col: "category" for col in categorical_columns4})
|
309 |
+
pos_pred = y4_model_xgb.predict(xgb.DMatrix(df4, enable_categorical=True))
|
310 |
return {"Prolonged ICU LOS": float(pos_pred[0]), "No Prolonged ICU LOS": 1 - float(pos_pred[0])}
|
311 |
|
312 |
def y4_predict_lgb(*args):
|
|
|
333 |
def y5_predict_xgb(*args):
|
334 |
df5 = pd.DataFrame([args], columns=x5.columns)
|
335 |
df5 = df5.astype({col: "category" for col in categorical_columns5})
|
336 |
+
pos_pred = y5_model_xgb.predict(xgb.DMatrix(df5, enable_categorical=True))
|
337 |
return {"Major Complications": float(pos_pred[0]), "No Major Complications": 1 - float(pos_pred[0])}
|
338 |
|
339 |
def y5_predict_lgb(*args):
|