simonduerr commited on
Commit
e37d702
1 Parent(s): 2818331

Update inference_app.py

Browse files
Files changed (1) hide show
  1. inference_app.py +139 -3
inference_app.py CHANGED
@@ -5,16 +5,152 @@ import gradio as gr
5
 
6
  from gradio_molecule3d import Molecule3D
7
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def predict (input_sequence, input_ligand, input_protein):
12
  start_time = time.time()
13
- # Do inference here
14
- # return an output pdb file with the protein and ligand with resname LIG or UNK.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  end_time = time.time()
16
  run_time = end_time - start_time
17
- return "test_out.pdb", run_time
18
 
19
  with gr.Blocks() as app:
20
 
 
5
 
6
  from gradio_molecule3d import Molecule3D
7
 
8
+ import sys
9
+ import os
10
+ import os
11
+ import numpy as np
12
+ from rdkit import Chem
13
+ from rdkit.Chem import AllChem
14
+ from rdkit.Chem import Draw
15
+ from rdkit.Chem.Draw import IPythonConsole
16
+ from rdkit.Chem import DataStructs
17
+ from rdkit.Chem import RDConfig
18
+ from rdkit.Chem import rdBase
19
+ import pickle
20
 
21
+ from Bio.PDB import *
22
+ import requests
23
+ import subprocess
24
 
25
+ import mdtraj as md
26
+ from enspara import geometry
27
+ from sklearn.cluster import DBSCAN
28
+ import pandas as pd
29
+
30
+ def run_smina(
31
+ ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1
32
+ ):
33
+ """
34
+ Perform docking with Smina.
35
+
36
+ Parameters
37
+ ----------
38
+ ligand_path: str or pathlib.Path
39
+ Path to ligand PDBQT file that should be docked.
40
+ protein_path: str or pathlib.Path
41
+ Path to protein PDBQT file that should be docked to.
42
+ out_path: str or pathlib.Path
43
+ Path to which docking poses should be saved, SDF or PDB format.
44
+ pocket_center: iterable of float or int
45
+ Coordinates defining the center of the binding site.
46
+ pocket_size: iterable of float or int
47
+ Lengths of edges defining the binding site.
48
+ num_poses: int
49
+ Maximum number of poses to generate.
50
+ exhaustiveness: int
51
+ Accuracy of docking calculations.
52
+
53
+ Returns
54
+ -------
55
+ output_text: str
56
+ The output of the Smina calculation.
57
+ """
58
+ output_text = subprocess.check_output(
59
+ [
60
+ "./smina.static",
61
+ "--ligand",
62
+ str(ligand_path),
63
+ "--receptor",
64
+ str(protein_path),
65
+ "--out",
66
+ str(out_path),
67
+ "--center_x",
68
+ str(pocket_center[0]),
69
+ "--center_y",
70
+ str(pocket_center[1]),
71
+ "--center_z",
72
+ str(pocket_center[2]),
73
+ "--size_x",
74
+ str(pocket_size[0]),
75
+ "--size_y",
76
+ str(pocket_size[1]),
77
+ "--size_z",
78
+ str(pocket_size[2]),
79
+ "--num_modes",
80
+ str(num_poses),
81
+ "--exhaustiveness",
82
+ str(exhaustiveness),
83
+ ],
84
+ universal_newlines=True, # needed to capture output text
85
+ )
86
+ return output_text
87
 
88
  def predict (input_sequence, input_ligand, input_protein):
89
  start_time = time.time()
90
+
91
+ m=Chem.MolFromSmiles(input_ligand)
92
+
93
+ m2=Chem.AddHs(m)
94
+ AllChem.EmbedMolecule(m2)
95
+ AllChem.MMFFOptimizeMolecule(m2)
96
+
97
+ Chem.SDWriter("ligand.sdf").write(m2)
98
+
99
+ os.system(f"obabel {input_protein.name} -xr -O receptor.pdbqt")
100
+ os.system("obabel -isdf ligand.sdf -O ligand.pdbqt")
101
+ #Find pocket
102
+ pdb = md.load('receptor.pdb')
103
+ # run ligsite
104
+ pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb)
105
+ eps_value = 0.15
106
+ min_samples_value = 5
107
+ dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value)
108
+ labels = dbscan.fit_predict(pockets_xyz)
109
+
110
+ # Find the unique clusters and their sizes
111
+ unique_labels, counts = np.unique(labels, return_counts=True)
112
+
113
+ # Exclude noise points
114
+ valid_clusters = unique_labels[unique_labels != -1]
115
+ valid_counts = counts[unique_labels != -1]
116
+
117
+ # Find the cluster with the most points (highest density)
118
+ densest_cluster_label = valid_clusters[np.argmax(valid_counts)]
119
+ densest_cluster_points = pockets_xyz[labels == densest_cluster_label]
120
+
121
+ pocket_center = np.mean(densest_cluster_points, axis=0)
122
+
123
+
124
+ import pandas as pd
125
+
126
+ top_df = pd.DataFrame()
127
+ top_df['serial'] = list(range(densest_cluster_points.shape[0]))
128
+ top_df['name'] = 'PK'
129
+ top_df['element'] = 'H'
130
+ top_df['resSeq'] = list(range(densest_cluster_points.shape[0]))
131
+ top_df['resName'] = 'PCK'
132
+ top_df['chainID'] = 0
133
+
134
+ pocket_top = md.Topology.from_dataframe(top_df, np.array([]))
135
+ pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top)
136
+ pocket_trj.save('./pockets_dense.pdb')
137
+
138
+ parser = PDBParser()
139
+ struc = parser.get_structure("X", "pockets_dense.pdb")
140
+ coords = [x.coord for x in struc.get_atoms()]
141
+ pocket_center = np.mean(coords, axis=0)
142
+ output_text = run_smina(
143
+ "ligand.pdbqt",
144
+ "receptor.pdbqt",
145
+ "docking_pose.pdb",
146
+ pocket_center,
147
+ [10,10,10],
148
+ )
149
+ os.system("pdb_rplresname -UNL:LIG docked_pose.pdb")
150
+ os.system("pdb_merge receptor.pdb docked_pose.pdb > output.pdb")
151
  end_time = time.time()
152
  run_time = end_time - start_time
153
+ return "output.pdb", run_time
154
 
155
  with gr.Blocks() as app:
156