simonduerr commited on
Commit
ff1a3bf
1 Parent(s): 5bb015b

Update inference_app.py

Browse files
Files changed (1) hide show
  1. inference_app.py +30 -18
inference_app.py CHANGED
@@ -87,7 +87,28 @@ def run_smina(
87
  )
88
  return output_text
89
 
90
- def predict (input_sequence, input_ligand, input_protein):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  start_time = time.time()
92
 
93
  if input_protein==None:
@@ -102,6 +123,7 @@ def predict (input_sequence, input_ligand, input_protein):
102
 
103
  os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt")
104
  os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt")
 
105
  #Find pocket
106
  pdb = md.load(input_protein.name)
107
  # run ligsite
@@ -110,23 +132,15 @@ def predict (input_sequence, input_ligand, input_protein):
110
  min_samples_value = 5
111
  dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value)
112
  labels = dbscan.fit_predict(pockets_xyz)
113
-
114
  # Find the unique clusters and their sizes
115
  unique_labels, counts = np.unique(labels, return_counts=True)
116
-
117
  # Exclude noise points
118
  valid_clusters = unique_labels[unique_labels != -1]
119
  valid_counts = counts[unique_labels != -1]
120
-
121
  # Find the cluster with the most points (highest density)
122
  densest_cluster_label = valid_clusters[np.argmax(valid_counts)]
123
  densest_cluster_points = pockets_xyz[labels == densest_cluster_label]
124
-
125
- pocket_center = np.mean(densest_cluster_points, axis=0)
126
-
127
-
128
- import pandas as pd
129
-
130
  top_df = pd.DataFrame()
131
  top_df['serial'] = list(range(densest_cluster_points.shape[0]))
132
  top_df['name'] = 'PK'
@@ -134,24 +148,22 @@ def predict (input_sequence, input_ligand, input_protein):
134
  top_df['resSeq'] = list(range(densest_cluster_points.shape[0]))
135
  top_df['resName'] = 'PCK'
136
  top_df['chainID'] = 0
137
-
138
  pocket_top = md.Topology.from_dataframe(top_df, np.array([]))
139
  pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top)
140
  pocket_trj.save('/usr/src/app/pockets_dense.pdb')
141
-
142
  parser = PDBParser()
143
  struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb")
144
  coords = [x.coord for x in struc.get_atoms()]
145
  pocket_center = np.mean(coords, axis=0)
 
146
  output_text = run_smina(
147
  "/usr/src/app/ligand.pdbqt",
148
  "/usr/src/app/receptor.pdbqt",
149
  "/usr/src/app/docking_pose.sdf",
150
  pocket_center,
151
  [10,10,10],
152
- )
153
- os.system("cat /usr/src/app/docking_pose.sdf")
154
-
155
  end_time = time.time()
156
  run_time = end_time - start_time
157
  return [input_protein.name,"/usr/src/app/docking_pose.sdf"], run_time
@@ -170,7 +182,7 @@ with gr.Blocks() as app:
170
  # define any options here
171
 
172
  # for automated inference the default options are used
173
- # slider_option = gr.Slider(0,10, label="Slider Option")
174
  # checkbox_option = gr.Checkbox(label="Checkbox Option")
175
  # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")
176
 
@@ -181,7 +193,7 @@ with gr.Blocks() as app:
181
  [
182
  "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL",
183
  "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
184
- "test_out.pdb"
185
  ],
186
  ],
187
  [input_sequence, input_ligand, input_protein],
@@ -215,6 +227,6 @@ with gr.Blocks() as app:
215
  out = Molecule3D(reps=reps)
216
  run_time = gr.Textbox(label="Runtime")
217
 
218
- btn.click(predict, inputs=[input_sequence, input_ligand, input_protein], outputs=[out, run_time])
219
 
220
  app.launch()
 
87
  )
88
  return output_text
89
 
90
+ def predict (input_sequence, input_ligand, input_protein, exhaustiveness):
91
+ """
92
+ Main prediction function that calls ligsite and smina
93
+
94
+ Parameters
95
+ ----------
96
+ input_sequence: str
97
+ monomer sequence
98
+ input_ligand: str
99
+ ligand as SMILES string
100
+ protein_path: gradio.File
101
+ Gradio file object to monomer protein structure as PDB
102
+ exhaustiveness: int
103
+ SMINA parameter
104
+
105
+ Returns
106
+ -------
107
+ output_structures: tuple
108
+ (output_protein, output_ligand_sdf)
109
+ run_time: float
110
+ run time of the program
111
+ """
112
  start_time = time.time()
113
 
114
  if input_protein==None:
 
123
 
124
  os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt")
125
  os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt")
126
+
127
  #Find pocket
128
  pdb = md.load(input_protein.name)
129
  # run ligsite
 
132
  min_samples_value = 5
133
  dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value)
134
  labels = dbscan.fit_predict(pockets_xyz)
 
135
  # Find the unique clusters and their sizes
136
  unique_labels, counts = np.unique(labels, return_counts=True)
 
137
  # Exclude noise points
138
  valid_clusters = unique_labels[unique_labels != -1]
139
  valid_counts = counts[unique_labels != -1]
 
140
  # Find the cluster with the most points (highest density)
141
  densest_cluster_label = valid_clusters[np.argmax(valid_counts)]
142
  densest_cluster_points = pockets_xyz[labels == densest_cluster_label]
143
+ # write cluster to PDB
 
 
 
 
 
144
  top_df = pd.DataFrame()
145
  top_df['serial'] = list(range(densest_cluster_points.shape[0]))
146
  top_df['name'] = 'PK'
 
148
  top_df['resSeq'] = list(range(densest_cluster_points.shape[0]))
149
  top_df['resName'] = 'PCK'
150
  top_df['chainID'] = 0
 
151
  pocket_top = md.Topology.from_dataframe(top_df, np.array([]))
152
  pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top)
153
  pocket_trj.save('/usr/src/app/pockets_dense.pdb')
 
154
  parser = PDBParser()
155
  struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb")
156
  coords = [x.coord for x in struc.get_atoms()]
157
  pocket_center = np.mean(coords, axis=0)
158
+ # run smina
159
  output_text = run_smina(
160
  "/usr/src/app/ligand.pdbqt",
161
  "/usr/src/app/receptor.pdbqt",
162
  "/usr/src/app/docking_pose.sdf",
163
  pocket_center,
164
  [10,10,10],
165
+ exhaustiveness=exhaustiveness
166
+ )
 
167
  end_time = time.time()
168
  run_time = end_time - start_time
169
  return [input_protein.name,"/usr/src/app/docking_pose.sdf"], run_time
 
182
  # define any options here
183
 
184
  # for automated inference the default options are used
185
+ exhaustiveness = gr.Slider(1,10,value=1, label="Slider Option")
186
  # checkbox_option = gr.Checkbox(label="Checkbox Option")
187
  # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")
188
 
 
193
  [
194
  "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL",
195
  "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
196
+ "input_test.pdb"
197
  ],
198
  ],
199
  [input_sequence, input_ligand, input_protein],
 
227
  out = Molecule3D(reps=reps)
228
  run_time = gr.Textbox(label="Runtime")
229
 
230
+ btn.click(predict, inputs=[input_sequence, input_ligand, input_protein, exhaustiveness], outputs=[out, run_time])
231
 
232
  app.launch()